1
0
mirror of synced 2025-12-19 18:14:56 -05:00

feat(source-slack): add custom api budget for threads and channel messages streams (#64553)

Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
Co-authored-by: Kat Wilson <40400595+katmarkham@users.noreply.github.com>
This commit is contained in:
Daryna Ishchenko
2025-08-14 20:04:45 +03:00
committed by GitHub
parent b803721903
commit 400cad55a7
7 changed files with 240 additions and 34 deletions

View File

@@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: c2281cee-86f9-4a86-bb48-d23286b4c7bd
dockerImageTag: 2.2.0-rc.5
dockerImageTag: 2.2.0-rc.6
dockerRepository: airbyte/source-slack
documentationUrl: https://docs.airbyte.com/integrations/sources/slack
githubIssueLabel: source-slack

View File

@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
version = "2.2.0-rc.5"
version = "2.2.0-rc.6"
name = "source-slack"
description = "Source implementation for Slack."
authors = [ "Airbyte <contact@airbyte.io>",]

View File

@@ -0,0 +1,102 @@
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
from dataclasses import dataclass
from datetime import timedelta
from typing import Any, Dict, Mapping
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
NoAuth,
)
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
InterpolatedString,
)
from airbyte_cdk.sources.declarative.requesters import HttpRequester
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
InterpolatedRequestOptionsProvider,
)
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
from airbyte_cdk.sources.streams.call_rate import (
APIBudget,
HttpRequestMatcher,
LimiterMixin,
MovingWindowCallRatePolicy,
Rate,
UnlimitedCallRatePolicy,
)
from airbyte_cdk.sources.streams.http import HttpClient
from airbyte_cdk.sources.types import EmptyString
MESSAGES_AND_THREADS_RATE = Rate(limit=1, interval=timedelta(seconds=60))
class MessagesAndThreadsApiBudget(APIBudget, LimiterMixin):
"""
Switches to MovingWindowCallRatePolicy 1 request per minute if rate limits were exceeded.
"""
def update_from_response(self, request: Any, response: Any) -> None:
current_policy = self.get_matching_policy(request)
if response.status_code == 429 and isinstance(current_policy, UnlimitedCallRatePolicy):
matchers = current_policy._matchers
self._policies = [
MovingWindowCallRatePolicy(
matchers=matchers,
rates=[MESSAGES_AND_THREADS_RATE],
)
]
@dataclass
class MessagesAndThreadsHttpRequester(HttpRequester):
"""
Redefines Custom API Budget to handle rate limits.
"""
url_match: str = None
# redefine this here to set up in InterpolatedRequestOptionsProvider in __post_init__
request_parameters: Dict[str, Any] = None
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
self._url = InterpolatedString.create(self.url if self.url else EmptyString, parameters=parameters)
# deprecated
self._url_base = InterpolatedString.create(self.url_base if self.url_base else EmptyString, parameters=parameters)
# deprecated
self._path = InterpolatedString.create(self.path if self.path else EmptyString, parameters=parameters)
if self.request_options_provider is None:
self._request_options_provider = InterpolatedRequestOptionsProvider(
config=self.config,
parameters=parameters,
request_parameters=self.request_parameters,
)
elif isinstance(self.request_options_provider, dict):
self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider)
else:
self._request_options_provider = self.request_options_provider
self._authenticator = self.authenticator or NoAuth(parameters=parameters)
self._http_method = HttpMethod[self.http_method] if isinstance(self.http_method, str) else self.http_method
self.error_handler = self.error_handler
self._parameters = parameters
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
backoff_strategies = self.error_handler.backoff_strategies # type: ignore
else:
backoff_strategies = None
self._http_client = HttpClient(
name=self.name,
logger=self.logger,
error_handler=self.error_handler,
api_budget=MessagesAndThreadsApiBudget(
policies=[
UnlimitedCallRatePolicy(
matchers=[HttpRequestMatcher(url=self.url_match)],
)
]
),
authenticator=self._authenticator,
use_cache=self.use_cache,
backoff_strategy=backoff_strategies,
disable_retries=self.disable_retries,
message_repository=self.message_repository,
)

View File

@@ -1,28 +0,0 @@
#
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
#
import logging
from typing import Optional, Union
from requests import RequestException, Response
from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
class SlackBackoffStrategy(BackoffStrategy):
def __init__(self, logger: logging.Logger):
self.logger = logger
def backoff_time(self, response_or_exception: Optional[Union[Response, RequestException]], **kwargs) -> Optional[float]:
"""
This method is called if we run into the rate limit.
Slack puts the retry time in the `Retry-After` response header so we
we return that value. If the response is anything other than a 429 (e.g: 5XX)
fall back on default retry behavior.
Rate Limits Docs: https://api.slack.com/docs/rate-limits#web
"""
if isinstance(response_or_exception, Response) and "Retry-After" in response_or_exception.headers:
return int(response_or_exception.headers["Retry-After"])
else:
self.logger.info("Retry-after header not found. Using default backoff value")
return 5

View File

@@ -31,6 +31,13 @@ definitions:
type: BearerAuthenticator
api_token: "{{ config['credentials']['access_token'] }}"
authenticator:
type: SelectiveAuthenticator
authenticator_selection_path: ["credentials", "option_title"]
authenticators:
Default OAuth2.0 authorization: "#/definitions/access_token_auth"
API Token Credentials: "#/definitions/api_token_auth"
requester:
type: HttpRequester
url_base: https://slack.com/api/
@@ -200,15 +207,26 @@ definitions:
retriever:
$ref: "#/definitions/retriever"
requester:
$ref: "#/definitions/requester"
use_cache: true
type: CustomRequester
class_name: "source_slack.components.slack_api_budget.MessagesAndThreadsHttpRequester"
url_match: "https://slack.com/api/conversations.history?.+"
url_base: https://slack.com/api/
path: "{{ parameters['path'] }}"
http_method: GET
request_parameters:
inclusive: "True"
request_headers: {}
authenticator:
$ref: "#/definitions/authenticator"
request_body_json: {}
error_handler:
type: DefaultErrorHandler
backoff_strategies:
- type: "WaitTimeFromHeader"
header: "retry-after"
- type: "WaitTimeFromHeader"
header: "Retry-After"
use_cache: true
record_selector:
$ref: "#/definitions/selector"
paginator:
@@ -275,9 +293,18 @@ definitions:
retriever:
$ref: "#/definitions/retriever"
requester:
$ref: "#/definitions/requester"
type: CustomRequester
class_name: "source_slack.components.slack_api_budget.MessagesAndThreadsHttpRequester"
url_match: "https://slack.com/api/conversations.replies?.+"
url_base: https://slack.com/api/
path: "{{ parameters['path'] }}"
http_method: GET
request_parameters:
channel: "{{ stream_partition['parent_slice']['channel'] }}"
request_headers: {}
authenticator:
$ref: "#/definitions/authenticator"
request_body_json: {}
error_handler:
type: DefaultErrorHandler
max_retries: 20

View File

@@ -7,15 +7,17 @@ import pytest
from source_slack import SourceSlack
from source_slack.components.channel_members_extractor import ChannelMembersExtractor
from source_slack.components.join_channels import ChannelsRetriever, JoinChannelsStream
from source_slack.components.threads_partition_router import ThreadsPartitionRouter
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordSelector
from airbyte_cdk.sources.declarative.requesters import HttpRequester
from airbyte_cdk.sources.streams.call_rate import MovingWindowCallRatePolicy, UnlimitedCallRatePolicy
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
def get_stream_by_name(stream_name, config):
streams = SourceSlack().streams(config=config)
streams = SourceSlack(catalog={}, config=config, state={}).streams(config=config)
for stream in streams:
if stream.name == stream_name:
return stream
@@ -95,3 +97,97 @@ def test_join_channel_read(requests_mock, token_config, joined_channel, caplog,
assert mocked_request.called
assert mocked_request.last_request._request.body == b'{"channel": "channel 2"}'
assert log_message in caplog.text
@pytest.mark.parametrize(
"threads_stream_state, expected_parent_state",
(
({}, {}),
(
{"float_ts": 7270247822.0},
# lookback window applied
{"float_ts": 7270161422.0},
),
(
{
"states": [
{
"partition": {"float_ts": "1683104542.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
"cursor": {"float_ts": "1753263869"},
},
{
"partition": {"float_ts": "1683104590.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
"cursor": {"float_ts": "1753263870"},
},
{
"partition": {"float_ts": "1683104590.931169", "parent_slice": {"channel": "C04KX3KEZ54", "parent_slice": {}}},
"cursor": {"float_ts": "1753263849"},
},
]
},
# lookback window applied
{"float_ts": 1753177470.0},
),
),
ids=["no_state", "old_format_state", "new_format_state"],
)
def test_threads_partition_router(token_config, threads_stream_state, expected_parent_state):
stream = get_stream_by_name("threads", token_config)
threads_partition_router = stream.retriever.stream_slicer._partition_router
threads_partition_router.set_initial_state(stream_state=threads_stream_state)
assert threads_partition_router.parent_stream_configs[0].stream.state["state"] == expected_parent_state
@pytest.mark.parametrize(
"response_status_code, api_response, expected_policy",
(
(
429,
[
# first call rate limited
{"headers": {"Retry-After": "1"}, "text": "rate limited", "status_code": 429},
# refreshed limits on second call
{"json": {"messages": []}, "status_code": 200},
],
MovingWindowCallRatePolicy,
),
(
200,
[
# no rate limits
{"json": {"messages": []}, "status_code": 200},
],
UnlimitedCallRatePolicy,
),
),
ids=["rate_limited_policy", "no_rate_limits_policy"],
)
def test_threads_and_messages_api_budget(response_status_code, api_response, expected_policy, token_config, requests_mock):
stream = get_stream_by_name("threads", token_config)
assert len(stream.retriever.requester._http_client._api_budget._policies) == 1
assert isinstance(stream.retriever.requester._http_client._api_budget._policies[0], UnlimitedCallRatePolicy)
messages = [{"ts": 1577866844}, {"ts": 1577877406}]
requests_mock.register_uri(
"GET",
"https://slack.com/api/conversations.replies",
api_response,
)
requests_mock.register_uri(
"GET",
"https://slack.com/api/conversations.history?limit=1000&channel=airbyte-for-beginners",
[{"json": {"messages": messages}}, {"json": {"messages": []}}],
)
requests_mock.register_uri(
"GET",
"https://slack.com/api/conversations.history?limit=1000&channel=good-reads",
[{"json": {"messages": messages}}, {"json": {"messages": []}}],
)
stream_slice = list(stream.stream_slices(sync_mode=SyncMode.incremental, stream_state={}))[0]
list(stream.retriever.read_records(records_schema={}, stream_slice=stream_slice))
assert len(stream.retriever.requester._http_client._api_budget._policies) == 1
assert isinstance(stream.retriever.requester._http_client._api_budget._policies[0], expected_policy)

View File

@@ -23,6 +23,10 @@ The following instructions guide you through creating a Slack app. Airbyte can o
If you are using a legacy Slack API Key, you can skip this section.
:::
:::warning
Source Slack has a different API Budget Requests Policy for the Channel Messages and Threads streams (one request per minute). We suggest you create a separate connection for those streams so that you do not experience slower syncs across all Slack streams. Please visit [Rate limiting section](https://docs.airbyte.com/integrations/sources/slack#rate-limiting) for more info.
:::
To create a Slack App, read this [tutorial](https://api.slack.com/tutorials/tracks/getting-a-token) on how to create an app, or follow these instructions.
1. Go to your [Apps](https://api.slack.com/apps)
@@ -150,10 +154,14 @@ Expand to see details about Slack connector limitations and troubleshooting.
Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits).
###### Rate Limits for Channel Messages and Threads streams:
Slack API [rate limits](https://api.slack.com/changelog/2025-05-terms-rate-limit-update-and-faq#what) for the [conversations.replies](https://api.slack.com/methods/conversations.replies) and [conversations.history](https://api.slack.com/methods/conversations.history) endpoints are now limited to one request per minute. Due to the new Source Slack policy, syncs of Channel Messages and Threads streams can be slow. If you want to sync data from Users, Channels and Channel Members streams in Source Slack more quickly, we recommended you create a separate connection for Channel Messages and Threads streams and other streams which observe a different rate limit policy. Please note that Users, Channels and Channel Members streams are not being read with the one request per minute policy, so their read time is only depends on how much data should be extracted.
### Troubleshooting
- Check out common troubleshooting issues for the Slack source connector on our Airbyte Forum [here](https://github.com/airbytehq/airbyte/discussions).
</details>
</HideInUI>
@@ -166,6 +174,7 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits).
| Version | Date | Pull Request | Subject |
|:-----------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 2.2.0-rc.6 | 2025-08-14 | [64553](https://github.com/airbytehq/airbyte/pull/64553) | Add API budget for Threads and Channel Messages streams. |
| 2.2.0-rc.5 | 2025-08-06 | [64530](https://github.com/airbytehq/airbyte/pull/64530) | Set use_cache = true for Channels and Channel Messages streams. |
| 2.2.0-rc.4 | 2025-08-04 | [64486](https://github.com/airbytehq/airbyte/pull/64486) | Add backoff strategy for Channels stream. |
| 2.2.0-rc.3 | 2025-07-29 | [64107](https://github.com/airbytehq/airbyte/pull/64107) | Add custom partition router. |