feat(source-slack): add custom api budget for threads and channel messages streams (#64553)
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com> Co-authored-by: Kat Wilson <40400595+katmarkham@users.noreply.github.com>
This commit is contained in:
@@ -10,7 +10,7 @@ data:
|
||||
connectorSubtype: api
|
||||
connectorType: source
|
||||
definitionId: c2281cee-86f9-4a86-bb48-d23286b4c7bd
|
||||
dockerImageTag: 2.2.0-rc.5
|
||||
dockerImageTag: 2.2.0-rc.6
|
||||
dockerRepository: airbyte/source-slack
|
||||
documentationUrl: https://docs.airbyte.com/integrations/sources/slack
|
||||
githubIssueLabel: source-slack
|
||||
|
||||
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
version = "2.2.0-rc.5"
|
||||
version = "2.2.0-rc.6"
|
||||
name = "source-slack"
|
||||
description = "Source implementation for Slack."
|
||||
authors = [ "Airbyte <contact@airbyte.io>",]
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, Mapping
|
||||
|
||||
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
|
||||
NoAuth,
|
||||
)
|
||||
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
|
||||
InterpolatedString,
|
||||
)
|
||||
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
||||
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
|
||||
InterpolatedRequestOptionsProvider,
|
||||
)
|
||||
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
||||
from airbyte_cdk.sources.streams.call_rate import (
|
||||
APIBudget,
|
||||
HttpRequestMatcher,
|
||||
LimiterMixin,
|
||||
MovingWindowCallRatePolicy,
|
||||
Rate,
|
||||
UnlimitedCallRatePolicy,
|
||||
)
|
||||
from airbyte_cdk.sources.streams.http import HttpClient
|
||||
from airbyte_cdk.sources.types import EmptyString
|
||||
|
||||
|
||||
MESSAGES_AND_THREADS_RATE = Rate(limit=1, interval=timedelta(seconds=60))
|
||||
|
||||
|
||||
class MessagesAndThreadsApiBudget(APIBudget, LimiterMixin):
|
||||
"""
|
||||
Switches to MovingWindowCallRatePolicy 1 request per minute if rate limits were exceeded.
|
||||
"""
|
||||
|
||||
def update_from_response(self, request: Any, response: Any) -> None:
|
||||
current_policy = self.get_matching_policy(request)
|
||||
if response.status_code == 429 and isinstance(current_policy, UnlimitedCallRatePolicy):
|
||||
matchers = current_policy._matchers
|
||||
self._policies = [
|
||||
MovingWindowCallRatePolicy(
|
||||
matchers=matchers,
|
||||
rates=[MESSAGES_AND_THREADS_RATE],
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessagesAndThreadsHttpRequester(HttpRequester):
|
||||
"""
|
||||
Redefines Custom API Budget to handle rate limits.
|
||||
"""
|
||||
|
||||
url_match: str = None
|
||||
# redefine this here to set up in InterpolatedRequestOptionsProvider in __post_init__
|
||||
request_parameters: Dict[str, Any] = None
|
||||
|
||||
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
||||
self._url = InterpolatedString.create(self.url if self.url else EmptyString, parameters=parameters)
|
||||
# deprecated
|
||||
self._url_base = InterpolatedString.create(self.url_base if self.url_base else EmptyString, parameters=parameters)
|
||||
# deprecated
|
||||
self._path = InterpolatedString.create(self.path if self.path else EmptyString, parameters=parameters)
|
||||
if self.request_options_provider is None:
|
||||
self._request_options_provider = InterpolatedRequestOptionsProvider(
|
||||
config=self.config,
|
||||
parameters=parameters,
|
||||
request_parameters=self.request_parameters,
|
||||
)
|
||||
elif isinstance(self.request_options_provider, dict):
|
||||
self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider)
|
||||
else:
|
||||
self._request_options_provider = self.request_options_provider
|
||||
self._authenticator = self.authenticator or NoAuth(parameters=parameters)
|
||||
self._http_method = HttpMethod[self.http_method] if isinstance(self.http_method, str) else self.http_method
|
||||
self.error_handler = self.error_handler
|
||||
self._parameters = parameters
|
||||
|
||||
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
|
||||
backoff_strategies = self.error_handler.backoff_strategies # type: ignore
|
||||
else:
|
||||
backoff_strategies = None
|
||||
|
||||
self._http_client = HttpClient(
|
||||
name=self.name,
|
||||
logger=self.logger,
|
||||
error_handler=self.error_handler,
|
||||
api_budget=MessagesAndThreadsApiBudget(
|
||||
policies=[
|
||||
UnlimitedCallRatePolicy(
|
||||
matchers=[HttpRequestMatcher(url=self.url_match)],
|
||||
)
|
||||
]
|
||||
),
|
||||
authenticator=self._authenticator,
|
||||
use_cache=self.use_cache,
|
||||
backoff_strategy=backoff_strategies,
|
||||
disable_retries=self.disable_retries,
|
||||
message_repository=self.message_repository,
|
||||
)
|
||||
@@ -1,28 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
#
|
||||
import logging
|
||||
from typing import Optional, Union
|
||||
|
||||
from requests import RequestException, Response
|
||||
|
||||
from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
|
||||
|
||||
|
||||
class SlackBackoffStrategy(BackoffStrategy):
|
||||
def __init__(self, logger: logging.Logger):
|
||||
self.logger = logger
|
||||
|
||||
def backoff_time(self, response_or_exception: Optional[Union[Response, RequestException]], **kwargs) -> Optional[float]:
|
||||
"""
|
||||
This method is called if we run into the rate limit.
|
||||
Slack puts the retry time in the `Retry-After` response header so we
|
||||
we return that value. If the response is anything other than a 429 (e.g: 5XX)
|
||||
fall back on default retry behavior.
|
||||
Rate Limits Docs: https://api.slack.com/docs/rate-limits#web
|
||||
"""
|
||||
if isinstance(response_or_exception, Response) and "Retry-After" in response_or_exception.headers:
|
||||
return int(response_or_exception.headers["Retry-After"])
|
||||
else:
|
||||
self.logger.info("Retry-after header not found. Using default backoff value")
|
||||
return 5
|
||||
@@ -31,6 +31,13 @@ definitions:
|
||||
type: BearerAuthenticator
|
||||
api_token: "{{ config['credentials']['access_token'] }}"
|
||||
|
||||
authenticator:
|
||||
type: SelectiveAuthenticator
|
||||
authenticator_selection_path: ["credentials", "option_title"]
|
||||
authenticators:
|
||||
Default OAuth2.0 authorization: "#/definitions/access_token_auth"
|
||||
API Token Credentials: "#/definitions/api_token_auth"
|
||||
|
||||
requester:
|
||||
type: HttpRequester
|
||||
url_base: https://slack.com/api/
|
||||
@@ -200,15 +207,26 @@ definitions:
|
||||
retriever:
|
||||
$ref: "#/definitions/retriever"
|
||||
requester:
|
||||
$ref: "#/definitions/requester"
|
||||
use_cache: true
|
||||
type: CustomRequester
|
||||
class_name: "source_slack.components.slack_api_budget.MessagesAndThreadsHttpRequester"
|
||||
url_match: "https://slack.com/api/conversations.history?.+"
|
||||
url_base: https://slack.com/api/
|
||||
path: "{{ parameters['path'] }}"
|
||||
http_method: GET
|
||||
request_parameters:
|
||||
inclusive: "True"
|
||||
request_headers: {}
|
||||
authenticator:
|
||||
$ref: "#/definitions/authenticator"
|
||||
request_body_json: {}
|
||||
error_handler:
|
||||
type: DefaultErrorHandler
|
||||
backoff_strategies:
|
||||
- type: "WaitTimeFromHeader"
|
||||
header: "retry-after"
|
||||
- type: "WaitTimeFromHeader"
|
||||
header: "Retry-After"
|
||||
use_cache: true
|
||||
record_selector:
|
||||
$ref: "#/definitions/selector"
|
||||
paginator:
|
||||
@@ -275,9 +293,18 @@ definitions:
|
||||
retriever:
|
||||
$ref: "#/definitions/retriever"
|
||||
requester:
|
||||
$ref: "#/definitions/requester"
|
||||
type: CustomRequester
|
||||
class_name: "source_slack.components.slack_api_budget.MessagesAndThreadsHttpRequester"
|
||||
url_match: "https://slack.com/api/conversations.replies?.+"
|
||||
url_base: https://slack.com/api/
|
||||
path: "{{ parameters['path'] }}"
|
||||
http_method: GET
|
||||
request_parameters:
|
||||
channel: "{{ stream_partition['parent_slice']['channel'] }}"
|
||||
request_headers: {}
|
||||
authenticator:
|
||||
$ref: "#/definitions/authenticator"
|
||||
request_body_json: {}
|
||||
error_handler:
|
||||
type: DefaultErrorHandler
|
||||
max_retries: 20
|
||||
|
||||
@@ -7,15 +7,17 @@ import pytest
|
||||
from source_slack import SourceSlack
|
||||
from source_slack.components.channel_members_extractor import ChannelMembersExtractor
|
||||
from source_slack.components.join_channels import ChannelsRetriever, JoinChannelsStream
|
||||
from source_slack.components.threads_partition_router import ThreadsPartitionRouter
|
||||
|
||||
from airbyte_cdk.models import SyncMode
|
||||
from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordSelector
|
||||
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
||||
from airbyte_cdk.sources.streams.call_rate import MovingWindowCallRatePolicy, UnlimitedCallRatePolicy
|
||||
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
|
||||
|
||||
|
||||
def get_stream_by_name(stream_name, config):
|
||||
streams = SourceSlack().streams(config=config)
|
||||
streams = SourceSlack(catalog={}, config=config, state={}).streams(config=config)
|
||||
for stream in streams:
|
||||
if stream.name == stream_name:
|
||||
return stream
|
||||
@@ -95,3 +97,97 @@ def test_join_channel_read(requests_mock, token_config, joined_channel, caplog,
|
||||
assert mocked_request.called
|
||||
assert mocked_request.last_request._request.body == b'{"channel": "channel 2"}'
|
||||
assert log_message in caplog.text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"threads_stream_state, expected_parent_state",
|
||||
(
|
||||
({}, {}),
|
||||
(
|
||||
{"float_ts": 7270247822.0},
|
||||
# lookback window applied
|
||||
{"float_ts": 7270161422.0},
|
||||
),
|
||||
(
|
||||
{
|
||||
"states": [
|
||||
{
|
||||
"partition": {"float_ts": "1683104542.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
|
||||
"cursor": {"float_ts": "1753263869"},
|
||||
},
|
||||
{
|
||||
"partition": {"float_ts": "1683104590.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
|
||||
"cursor": {"float_ts": "1753263870"},
|
||||
},
|
||||
{
|
||||
"partition": {"float_ts": "1683104590.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
|
||||
"cursor": {"float_ts": "1753263849"},
|
||||
},
|
||||
]
|
||||
},
|
||||
# lookback window applied
|
||||
{"float_ts": 1753177470.0},
|
||||
),
|
||||
),
|
||||
ids=["no_state", "old_format_state", "new_format_state"],
|
||||
)
|
||||
def test_threads_partition_router(token_config, threads_stream_state, expected_parent_state):
|
||||
stream = get_stream_by_name("threads", token_config)
|
||||
threads_partition_router = stream.retriever.stream_slicer._partition_router
|
||||
threads_partition_router.set_initial_state(stream_state=threads_stream_state)
|
||||
assert threads_partition_router.parent_stream_configs[0].stream.state["state"] == expected_parent_state
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_status_code, api_response, expected_policy",
|
||||
(
|
||||
(
|
||||
429,
|
||||
[
|
||||
# first call rate limited
|
||||
{"headers": {"Retry-After": "1"}, "text": "rate limited", "status_code": 429},
|
||||
# refreshed limits on second call
|
||||
{"json": {"messages": []}, "status_code": 200},
|
||||
],
|
||||
MovingWindowCallRatePolicy,
|
||||
),
|
||||
(
|
||||
200,
|
||||
[
|
||||
# no rate limits
|
||||
{"json": {"messages": []}, "status_code": 200},
|
||||
],
|
||||
UnlimitedCallRatePolicy,
|
||||
),
|
||||
),
|
||||
ids=["rate_limited_policy", "no_rate_limits_policy"],
|
||||
)
|
||||
def test_threads_and_messages_api_budget(response_status_code, api_response, expected_policy, token_config, requests_mock):
|
||||
stream = get_stream_by_name("threads", token_config)
|
||||
assert len(stream.retriever.requester._http_client._api_budget._policies) == 1
|
||||
assert isinstance(stream.retriever.requester._http_client._api_budget._policies[0], UnlimitedCallRatePolicy)
|
||||
|
||||
messages = [{"ts": 1577866844}, {"ts": 1577877406}]
|
||||
|
||||
requests_mock.register_uri(
|
||||
"GET",
|
||||
"https://slack.com/api/conversations.replies",
|
||||
api_response,
|
||||
)
|
||||
requests_mock.register_uri(
|
||||
"GET",
|
||||
"https://slack.com/api/conversations.history?limit=1000&channel=airbyte-for-beginners",
|
||||
[{"json": {"messages": messages}}, {"json": {"messages": []}}],
|
||||
)
|
||||
requests_mock.register_uri(
|
||||
"GET",
|
||||
"https://slack.com/api/conversations.history?limit=1000&channel=good-reads",
|
||||
[{"json": {"messages": messages}}, {"json": {"messages": []}}],
|
||||
)
|
||||
|
||||
stream_slice = list(stream.stream_slices(sync_mode=SyncMode.incremental, stream_state={}))[0]
|
||||
|
||||
list(stream.retriever.read_records(records_schema={}, stream_slice=stream_slice))
|
||||
|
||||
assert len(stream.retriever.requester._http_client._api_budget._policies) == 1
|
||||
assert isinstance(stream.retriever.requester._http_client._api_budget._policies[0], expected_policy)
|
||||
|
||||
@@ -23,6 +23,10 @@ The following instructions guide you through creating a Slack app. Airbyte can o
|
||||
If you are using a legacy Slack API Key, you can skip this section.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Source Slack has a different API Budget Requests Policy for the Channel Messages and Threads streams (one request per minute). We suggest you create a separate connection for those streams so that you do not experience slower syncs across all Slack streams. Please visit [Rate limiting section](https://docs.airbyte.com/integrations/sources/slack#rate-limiting) for more info.
|
||||
:::
|
||||
|
||||
To create a Slack App, read this [tutorial](https://api.slack.com/tutorials/tracks/getting-a-token) on how to create an app, or follow these instructions.
|
||||
|
||||
1. Go to your [Apps](https://api.slack.com/apps)
|
||||
@@ -150,10 +154,14 @@ Expand to see details about Slack connector limitations and troubleshooting.
|
||||
|
||||
Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits).
|
||||
|
||||
###### Rate Limits for Channel Messages and Threads streams:
|
||||
Slack API [rate limits](https://api.slack.com/changelog/2025-05-terms-rate-limit-update-and-faq#what) for the [conversations.replies](https://api.slack.com/methods/conversations.replies) and [conversations.history](https://api.slack.com/methods/conversations.history) endpoints are now limited to one request per minute. Due to the new Source Slack policy, syncs of Channel Messages and Threads streams can be slow. If you want to sync data from Users, Channels and Channel Members streams in Source Slack more quickly, we recommended you create a separate connection for Channel Messages and Threads streams and other streams which observe a different rate limit policy. Please note that Users, Channels and Channel Members streams are not being read with the one request per minute policy, so their read time is only depends on how much data should be extracted.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
- Check out common troubleshooting issues for the Slack source connector on our Airbyte Forum [here](https://github.com/airbytehq/airbyte/discussions).
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
</HideInUI>
|
||||
@@ -166,6 +174,7 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits).
|
||||
|
||||
| Version | Date | Pull Request | Subject |
|
||||
|:-----------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| 2.2.0-rc.6 | 2025-08-14 | [64553](https://github.com/airbytehq/airbyte/pull/64553) | Add API budget for Threads and Channel Messages streams. |
|
||||
| 2.2.0-rc.5 | 2025-08-06 | [64530](https://github.com/airbytehq/airbyte/pull/64530) | Set use_cache = true for Channels and Channel Messages streams. |
|
||||
| 2.2.0-rc.4 | 2025-08-04 | [64486](https://github.com/airbytehq/airbyte/pull/64486) | Add backoff strategy for Channels stream. |
|
||||
| 2.2.0-rc.3 | 2025-07-29 | [64107](https://github.com/airbytehq/airbyte/pull/64107) | Add custom partition router. |
|
||||
|
||||
Reference in New Issue
Block a user