1
0
mirror of synced 2025-12-25 02:09:19 -05:00

feat(source-microsoft-sharepoint): Provide ability to sync other sites than Main sharepoint site (#54658)

This commit is contained in:
Aldo Gonzalez
2025-03-14 14:57:55 -06:00
committed by GitHub
parent b5173567ae
commit 0fb16e3974
8 changed files with 74 additions and 11 deletions

View File

@@ -512,6 +512,13 @@
"default": ".",
"order": 4,
"type": "string"
},
"site_url": {
"title": "Site URL",
"description": "Url of SharePoint site to search for files. Leave empty to search in the main site.",
"default": "",
"order": 5,
"type": "string"
}
},
"required": ["streams", "credentials"]

View File

@@ -20,7 +20,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 59353119-f0f2-4e5a-a8ba-15d887bc34f6
dockerImageTag: 0.7.2
dockerImageTag: 0.8.0
dockerRepository: airbyte/source-microsoft-sharepoint
githubIssueLabel: source-microsoft-sharepoint
icon: microsoft-sharepoint.svg

View File

@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
version = "0.7.2"
version = "0.8.0"
name = "source-microsoft-sharepoint"
description = "Source implementation for Microsoft SharePoint."
authors = [ "Airbyte <contact@airbyte.io>",]

View File

@@ -15,7 +15,7 @@ from source_microsoft_sharepoint.utils import PlaceholderUrlBuilder
class SourceMicrosoftSharePoint(FileBasedSource):
SCOPES = ["offline_access", "Files.Read.All"]
SCOPES = ["offline_access", "Files.Read.All", "Sites.Read.All", "Sites.Selected"]
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]):
super().__init__(

View File

@@ -111,6 +111,12 @@ class SourceMicrosoftSharePointSpec(AbstractFileBasedSpec, BaseModel):
order=4,
default=".",
)
site_url: str = Field(
title="Site URL",
description="Url of SharePoint site to search for files. Leave empty to search in the main site.",
order=5,
default="",
)
@classmethod
def documentation_url(cls) -> str:

View File

@@ -202,13 +202,29 @@ class SourceMicrosoftSharePointStreamReader(AbstractFileBasedStreamReader):
yield from self._list_directories_and_files(folder, folder_path_url)
def get_site_drive(self):
try:
if not self.config.site_url:
# get main site drives
drives = execute_query_with_retry(self.one_drive_client.drives.get())
else:
# get drives for site drives provided in the config
drives = execute_query_with_retry(self.one_drive_client.sites.get_by_url(self.config.site_url).drives.get())
return drives
except Exception as ex:
site = self.config.site_url if self.config.site_url else "default"
raise AirbyteTracedException(
f"Failed to retrieve drives from sharepoint {site} site. Error: {str(ex)}", failure_type=FailureType.config_error
)
@property
@lru_cache(maxsize=None)
def drives(self):
"""
Retrieves and caches SharePoint drives, including the user's drive based on authentication type.
"""
drives = execute_query_with_retry(self.one_drive_client.drives.get())
drives = self.get_site_drive()
# skip this step for application authentication flow
if self.config.credentials.auth_type != "Client" or (
@@ -299,7 +315,7 @@ class SourceMicrosoftSharePointStreamReader(AbstractFileBasedStreamReader):
def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
preserve_directory_structure = self.preserve_directory_structure()
file_path = file.uri
match = re.search(r"sharepoint\.com/Shared%20Documents(.*)", file_path)
match = re.search(r"sharepoint\.com(?:/sites/[^/]+)?/Shared%20Documents(.*)", file_path)
if match:
file_path = match.group(1)

View File

@@ -39,6 +39,7 @@ def setup_reader_class():
config.start_date = None
config.credentials = Mock()
config.folder_path = "."
config.site_url = ""
config.credentials.auth_type = "Client"
config.search_scope = "ALL"
reader.config = config # Set up the necessary configuration
@@ -193,17 +194,44 @@ def test_open_file(mock_smart_open, file_extension, expected_compression):
@pytest.mark.parametrize(
"file_extension, expected_paths",
"file_uri, file_extension, expected_paths",
[
("txt.gz", {"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"}),
("txt.bz2", {"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"}),
("txt", {"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"}),
(
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
"txt.gz",
{"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"},
),
(
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
"txt.bz2",
{"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"},
),
(
"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file",
"txt",
{"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"},
),
(
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
"txt.gz",
{"bytes": ANY, "file_relative_path": "file.txt.gz", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.gz"},
),
(
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
"txt.bz2",
{"bytes": ANY, "file_relative_path": "file.txt.bz2", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt.bz2"},
),
(
"https://my_favorite_sharepoint.sharepoint.com/sites/NOT_DEFAULT_SITE/Shared%20Documents/file",
"txt",
{"bytes": ANY, "file_relative_path": "file.txt", "file_url": f"{TEST_LOCAL_DIRECTORY}/file.txt"},
),
],
)
@patch("source_microsoft_sharepoint.stream_reader.SourceMicrosoftSharePointStreamReader.get_access_token")
@patch("source_microsoft_sharepoint.stream_reader.requests.get")
@patch("source_microsoft_sharepoint.stream_reader.requests.head")
def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token, file_extension, expected_paths):
def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token, file_uri, file_extension, expected_paths):
"""
Test the get_file method in SourceMicrosoftSharePointStreamReader.
@@ -218,7 +246,7 @@ def test_get_file(mock_requests_head, mock_requests_get, mock_get_access_token,
file_extension (str): The file extension to test (e.g., 'txt.gz').
expected_paths (dict): The expected paths and file size in the result.
"""
file_uri = f"https://my_favorite_sharepoint.sharepoint.com/Shared%20Documents/file.{file_extension}"
file_uri = f"{file_uri}.{file_extension}"
mock_file = Mock(download_url=f"https://example.com/file.{file_extension}", uri=file_uri)
mock_logger = Mock()
mock_get_access_token.return_value = "dummy_access_token"