1
0
mirror of synced 2025-12-31 06:05:12 -05:00
Files
airbyte/airbyte-integrations/connectors/source-intercom/source_intercom/source.py

455 lines
17 KiB
Python
Executable File

#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
from abc import ABC
from datetime import datetime
from enum import Enum
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
from urllib.parse import parse_qsl, urlparse
import requests
from airbyte_cdk.logger import AirbyteLogger
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
from requests.auth import AuthBase
class IntercomStream(HttpStream, ABC):
url_base = "https://api.intercom.io/"
primary_key = "id"
data_fields = ["data"]
def __init__(
self,
authenticator: AuthBase,
start_date: str = None,
**kwargs,
):
self.start_date = start_date
super().__init__(authenticator=authenticator)
@property
def authenticator(self):
"""
Fix of the bug when isinstance(authenticator, AuthBase) and
default logic returns incorrect authenticator values
"""
if self._session.auth:
return self._session.auth
return super().authenticator
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
"""
Abstract method of HttpStream - should be overwritten.
Returning None means there are no more pages to read in response.
"""
next_page = response.json().get("pages", {}).get("next")
if next_page:
return dict(parse_qsl(urlparse(next_page).query))
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = {}
if next_page_token:
params.update(**next_page_token)
return params
def request_headers(self, **kwargs) -> Mapping[str, Any]:
return {"Accept": "application/json"}
def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
try:
yield from super().read_records(*args, **kwargs)
except requests.exceptions.HTTPError as e:
error_message = e.response.text
if error_message:
self.logger.error(f"Stream {self.name}: {e.response.status_code} " f"{e.response.reason} - {error_message}")
raise e
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
data = response.json()
for data_field in self.data_fields:
if data_field not in data:
continue
data = data[data_field]
if data and isinstance(data, list):
break
if isinstance(data, dict):
yield data
else:
yield from data
class IncrementalIntercomStream(IntercomStream, ABC):
cursor_field = "updated_at"
def __init__(self, authenticator: AuthBase, start_date: str = None, **kwargs):
super().__init__(authenticator, start_date, **kwargs)
self.has_old_records = False
def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mapping[str, Any] = None) -> Iterable:
"""
Endpoint does not provide query filtering params, but they provide us
updated_at field in most cases, so we used that as incremental filtering
during the slicing.
"""
if not stream_state or record[self.cursor_field] > stream_state.get(self.cursor_field):
yield record
else:
self.has_old_records = True
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
record = super().parse_response(response, stream_state, **kwargs)
for record in record:
yield from self.filter_by_state(stream_state=stream_state, record=record)
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, any]:
"""
This method is called once for each record returned from the API to
compare the cursor field value in that record with the current state
we then return an updated state object. If this is the first time we
run a sync or no state was passed, current_stream_state will be None.
"""
current_stream_state = current_stream_state or {}
current_stream_state_date = current_stream_state.get(self.cursor_field, self.start_date)
latest_record_date = latest_record.get(self.cursor_field, self.start_date)
return {self.cursor_field: max(current_stream_state_date, latest_record_date)}
class ChildStreamMixin:
parent_stream_class: Optional[IntercomStream] = None
def stream_slices(self, sync_mode, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
parent_stream = self.parent_stream_class(authenticator=self.authenticator, start_date=self.start_date)
for slice in parent_stream.stream_slices(sync_mode=sync_mode):
for item in self.parent_stream_class(
authenticator=self.authenticator, start_date=self.start_date, stream_slice=slice
).read_records(sync_mode=sync_mode):
yield {"id": item["id"]}
class Admins(IntercomStream):
"""Return list of all admins.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-admins
Endpoint: https://api.intercom.io/admins
"""
data_fields = ["admins"]
def path(self, **kwargs) -> str:
return "admins"
class Companies(IncrementalIntercomStream):
"""Return list of all companies.
The Intercom API provides 2 similar endpoint for loading of companies:
1) "standard" - https://developers.intercom.com/intercom-api-reference/reference#list-companies.
But this endpoint does not work well for huge datasets and can have performance problems.
2) "scroll" - https://developers.intercom.com/intercom-api-reference/reference#iterating-over-all-companies
It has good performance but at same time only one script/client can use it across the client's entire account.
According to above circumstances no one endpoint can't be used permanently. That's why this stream tries can
apply both endpoints according to the following logic:
1) By default the stream tries to load data by "scroll" endpoint.
2) Try to wait a "scroll" request within a minute (3 attempts with delay 20,5 seconds)
if a "stroll" is busy by another script
3) Switch to using of the "standard" endpoint.
"""
class EndpointType(Enum):
scroll = "companies/scroll"
standard = "companies"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._backoff_count = 0
self._use_standard = False
self._endpoint_type = self.EndpointType.scroll
self._total_count = None # uses for saving of a total_count value once
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
"""For reset scroll needs to iterate pages untill the last.
Another way need wait 1 min for the scroll to expire to get a new list for companies segments."""
data = response.json()
if self._total_count is None and data.get("total_count"):
self._total_count = data["total_count"]
self.logger.info(f"found {self._total_count} companies")
if self.can_use_scroll():
scroll_param = data.get("scroll_param")
# this stream always has only one data field
data_field = self.data_fields[0]
if scroll_param and data.get(data_field):
return {"scroll_param": scroll_param}
elif not data.get("errors"):
return super().next_page_token(response)
return None
def need_use_standard(self):
return not self.can_use_scroll() or self._use_standard
def can_use_scroll(self):
"""Check backoff count"""
return self._backoff_count <= 3
def path(self, **kwargs) -> str:
return self._endpoint_type.value
@classmethod
def check_exists_scroll(cls, response: requests.Response) -> bool:
if response.status_code in [400, 404]:
# example response:
# {..., "errors": [{'code': 'scroll_exists', 'message': 'scroll already exists for this workspace'}]}
# {..., "errors": [{'code': 'not_found', 'message':'scroll parameter not found'}]}
err_body = response.json()["errors"][0]
if err_body["code"] in ["scroll_exists", "not_found"]:
return True
return False
@property
def raise_on_http_errors(self) -> bool:
if self.need_use_standard() and self._endpoint_type == self.EndpointType.scroll:
return False
return True
def stream_slices(self, sync_mode, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
yield None
if self.need_use_standard():
self._endpoint_type = self.EndpointType.standard
yield None
def should_retry(self, response: requests.Response) -> bool:
if self.check_exists_scroll(response):
self._backoff_count += 1
if self.need_use_standard():
self.logger.error(
"Can't create a new scroll request within an minute or scroll param was expired. "
"Let's try to use a standard non-scroll endpoint."
)
return False
return True
return super().should_retry(response)
def backoff_time(self, response: requests.Response) -> Optional[float]:
if response.status_code == 404:
self._use_standard = True
# Need return value greater than zero to use UserDefinedBackoffException class
return 0.01
if self.check_exists_scroll(response):
self.logger.warning("A previous scroll request is exists. " "It must be deleted within an minute automatically")
# try to check 3 times
return 20.5
return super().backoff_time(response)
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
if not self.raise_on_http_errors:
data = response.json()
if data.get("errors"):
return
yield from super().parse_response(response, stream_state=stream_state, **kwargs)
class CompanySegments(ChildStreamMixin, IncrementalIntercomStream):
"""Return list of all company segments.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-attached-segments-1
Endpoint: https://api.intercom.io/companies/<id>/segments
"""
parent_stream_class = Companies
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f"/companies/{stream_slice['id']}/segments"
class Conversations(IncrementalIntercomStream):
"""Return list of all conversations.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-conversations
Endpoint: https://api.intercom.io/conversations
"""
data_fields = ["conversations"]
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = super().request_params(next_page_token, **kwargs)
params.update({"order": "desc", "sort": self.cursor_field})
return params
# We're sorting by desc. Once we hit the first page with an out-of-date result we can stop.
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
if self.has_old_records:
return None
return super().next_page_token(response)
def path(self, **kwargs) -> str:
return "conversations"
class ConversationParts(ChildStreamMixin, IncrementalIntercomStream):
"""Return list of all conversation parts.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#retrieve-a-conversation
Endpoint: https://api.intercom.io/conversations/<id>
"""
data_fields = ["conversation_parts", "conversation_parts"]
parent_stream_class = Conversations
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f"/conversations/{stream_slice['id']}"
class Segments(IncrementalIntercomStream):
"""Return list of all segments.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-segments
Endpoint: https://api.intercom.io/segments
"""
data_fields = ["segments"]
def path(self, **kwargs) -> str:
return "segments"
class Contacts(IncrementalIntercomStream):
"""Return list of all contacts.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-contacts
Endpoint: https://api.intercom.io/contacts
"""
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
"""
Abstract method of HttpStream - should be overwritten.
Returning None means there are no more pages to read in response.
"""
next_page = response.json().get("pages", {}).get("next")
if isinstance(next_page, dict):
return {"starting_after": next_page["starting_after"]}
if isinstance(next_page, str):
return super().next_page_token(response)
def path(self, **kwargs) -> str:
return "contacts"
class DataAttributes(IntercomStream):
primary_key = "name"
def path(self, **kwargs) -> str:
return "data_attributes"
class CompanyAttributes(DataAttributes):
"""Return list of all data attributes belonging to a workspace for companies.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-data-attributes
Endpoint: https://api.intercom.io/data_attributes?model=company
"""
def request_params(self, **kwargs) -> MutableMapping[str, Any]:
return {"model": "company"}
class ContactAttributes(DataAttributes):
"""Return list of all data attributes belonging to a workspace for contacts.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-data-attributes
Endpoint: https://api.intercom.io/data_attributes?model=contact
"""
def request_params(self, **kwargs) -> MutableMapping[str, Any]:
return {"model": "contact"}
class Tags(IntercomStream):
"""Return list of all tags.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-tags-for-an-app
Endpoint: https://api.intercom.io/tags
"""
primary_key = "name"
def path(self, **kwargs) -> str:
return "tags"
class Teams(IntercomStream):
"""Return list of all teams.
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-teams
Endpoint: https://api.intercom.io/teams
"""
primary_key = "name"
data_fields = ["teams"]
def path(self, **kwargs) -> str:
return "teams"
class VersionApiAuthenticator(TokenAuthenticator):
"""Intercom API support its dynamic versions' switching.
But this connector should support only one for any resource account and
it is realised by the additional request header 'Intercom-Version'
Docs: https://developers.intercom.com/building-apps/docs/update-your-api-version#section-selecting-the-version-via-the-developer-hub
"""
relevant_supported_version = "2.2"
def get_auth_header(self) -> Mapping[str, Any]:
headers = super().get_auth_header()
headers["Intercom-Version"] = self.relevant_supported_version
return headers
class SourceIntercom(AbstractSource):
"""
Source Intercom fetch data from messaging platform.
"""
def check_connection(self, logger, config) -> Tuple[bool, any]:
authenticator = VersionApiAuthenticator(token=config["access_token"])
try:
url = f"{IntercomStream.url_base}/tags"
auth_headers = {"Accept": "application/json", **authenticator.get_auth_header()}
session = requests.get(url, headers=auth_headers)
session.raise_for_status()
return True, None
except requests.exceptions.RequestException as e:
return False, e
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
config["start_date"] = datetime.strptime(config["start_date"], "%Y-%m-%dT%H:%M:%SZ").timestamp()
AirbyteLogger().log("INFO", f"Using start_date: {config['start_date']}")
auth = VersionApiAuthenticator(token=config["access_token"])
return [
Admins(authenticator=auth, **config),
Companies(authenticator=auth, **config),
CompanySegments(authenticator=auth, **config),
Conversations(authenticator=auth, **config),
ConversationParts(authenticator=auth, **config),
Contacts(authenticator=auth, **config),
CompanyAttributes(authenticator=auth, **config),
ContactAttributes(authenticator=auth, **config),
Segments(authenticator=auth, **config),
Tags(authenticator=auth, **config),
Teams(authenticator=auth, **config),
]