1
0
mirror of synced 2025-12-23 21:03:15 -05:00

fix(source-microsoft-sharepoint): handle wrong folder name (#45646)

Signed-off-by: Artem Inzhyyants <artem.inzhyyants@gmail.com>
This commit is contained in:
Artem Inzhyyants
2024-09-19 16:51:18 +02:00
committed by GitHub
parent 39caa99fa1
commit 009265a1d8
6 changed files with 46 additions and 31 deletions

View File

@@ -27,9 +27,11 @@ acceptance_tests:
tests:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
timeout_seconds: 3600
future_state:
future_state_path: "integration_tests/abnormal_state.json"
full_refresh:
tests:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
timeout_seconds: 3600

View File

@@ -20,7 +20,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 59353119-f0f2-4e5a-a8ba-15d887bc34f6
dockerImageTag: 0.5.1
dockerImageTag: 0.5.2
dockerRepository: airbyte/source-microsoft-sharepoint
githubIssueLabel: source-microsoft-sharepoint
icon: microsoft-sharepoint.svg

View File

@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
version = "0.5.1"
version = "0.5.2"
name = "source-microsoft-sharepoint"
description = "Source implementation for Microsoft SharePoint."
authors = [ "Airbyte <contact@airbyte.io>",]

View File

@@ -18,7 +18,7 @@ from msal import ConfidentialClientApplication
from office365.graph_client import GraphClient
from source_microsoft_sharepoint.spec import SourceMicrosoftSharePointSpec
from .utils import MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls
from .utils import FolderNotFoundException, MicrosoftSharePointRemoteFile, execute_query_with_retry, filter_http_urls
class SourceMicrosoftSharePointClient:
@@ -187,7 +187,10 @@ class SourceMicrosoftSharePointStreamReader(AbstractFileBasedStreamReader):
folder = drive.root
folder_path_url = drive.web_url
else:
folder = execute_query_with_retry(drive.root.get_by_path(folder_path).get())
try:
folder = execute_query_with_retry(drive.root.get_by_path(folder_path).get())
except FolderNotFoundException:
continue
folder_path_url = drive.web_url + "/" + folder_path
yield from self._list_directories_and_files(folder, folder_path_url)

View File

@@ -1,6 +1,5 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
import logging
import time
from datetime import datetime
from enum import Enum
@@ -9,6 +8,8 @@ from http import HTTPStatus
from airbyte_cdk import AirbyteTracedException, FailureType
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
LOGGER = logging.getLogger("airbyte")
class SearchScope(Enum):
OWN_DRIVES = "OWN_DRIVES"
@@ -16,6 +17,10 @@ class SearchScope(Enum):
BOTH = "BOTH"
class FolderNotFoundException(Exception):
pass
class MicrosoftSharePointRemoteFile(RemoteFile):
download_url: str
@@ -77,6 +82,10 @@ def execute_query_with_retry(obj, max_retries=5, initial_retry_after=5, max_retr
time.sleep(retry_after)
retries += 1
retry_after = min(retry_after * 2, max_retry_after) # Double the wait time for next retry, up to a max limit
elif hasattr(ex, "response") and ex.response.status_code == HTTPStatus.NOT_FOUND:
error_message = f"Requested item/folder could not be found: url: {ex.response.url}"
LOGGER.warning(error_message)
raise FolderNotFoundException(error_message)
else:
# Re-raise exceptions that are not related to rate limits or service availability
raise AirbyteTracedException.from_exception(ex, message="Caught unexpected exception")