1
0
mirror of synced 2025-12-25 02:09:19 -05:00

connectors-ci: disable dependency scanning (#29033)

This commit is contained in:
Augustin
2023-08-03 20:32:01 +02:00
committed by GitHub
parent 549e36f156
commit bf719b88e9
8 changed files with 109 additions and 58 deletions

View File

@@ -96,7 +96,7 @@ At this point you can run `airbyte-ci` commands from the root of the repository.
#### Options
| Option | Default value | Mapped environment variable | Description |
|-----------------------------------------|---------------------------------|-------------------------------|---------------------------------------------------------------------------------------------|
| --------------------------------------- | ------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------- |
| `--no-tui` | | | Disables the Dagger terminal UI. |
| `--is-local/--is-ci` | `--is-local` | | Determines the environment in which the CLI runs: local environment or CI environment. |
| `--git-branch` | The checked out git branch name | `CI_GIT_BRANCH` | The git branch on which the pipelines will run. |
@@ -115,16 +115,16 @@ Available commands:
* `airbyte-ci connectors publish`: Publish a connector to Airbyte's DockerHub.
#### Options
| Option | Multiple | Default value | Description |
|------------------------|----------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `--use-remote-secrets` | False | True | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. |
| `--name` | True | | Select a specific connector for which the pipeline will run. Can be used multiple time to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi` |
| `--release-stage` | True | | Select connectors with a specific release stage: `alpha`, `beta`, `generally_available`. Can be used multiple times to select multiple release stages. |
| `--language` | True | | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages. |
| `--modified` | False | False | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation). |
| `--concurrency` | False | 5 | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage. |
| `--metadata-change-only/--not-metadata-change-only` | False | `--not-metadata-change-only` | Only run the pipeline on connectors with changes on their metadata.yaml file. |
| Option | Multiple | Default value | Description |
| -------------------------------------------------------------- | -------- | -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--use-remote-secrets` | False | True | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. |
| `--name` | True | | Select a specific connector for which the pipeline will run. Can be used multiple time to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi` |
| `--release-stage` | True | | Select connectors with a specific release stage: `alpha`, `beta`, `generally_available`. Can be used multiple times to select multiple release stages. |
| `--language` | True | | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages. |
| `--modified` | False | False | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation). |
| `--concurrency` | False | 5 | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage. |
| `--metadata-change-only/--not-metadata-change-only` | False | `--not-metadata-change-only` | Only run the pipeline on connectors with changes on their metadata.yaml file. |
| `--enable-dependency-scanning / --disable-dependency-scanning` | False | ` --disable-dependency-scanning` | When enabled the dependency scanning will be performed to detect the connectors to select according to a dependency change. |
### <a id="connectors-list-command"></a>`connectors list` command
Retrieve the list of connectors satisfying the provided filters.
@@ -285,7 +285,7 @@ Publish all connectors modified in the head commit: `airbyte-ci connectors --mod
### Options
| Option | Required | Default | Mapped environment variable | Description |
|--------------------------------------|----------|-----------------|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ------------------------------------ | -------- | --------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--pre-release/--main-release` | False | `--pre-release` | | Whether to publish the pre-release or the main release version of a connector. Defaults to pre-release. For main release you have to set the credentials to interact with the GCS bucket. |
| `--docker-hub-username` | True | | `DOCKER_HUB_USERNAME` | Your username to connect to DockerHub. |
| `--docker-hub-password` | True | | `DOCKER_HUB_PASSWORD` | Your password to connect to DockerHub. |
@@ -329,7 +329,7 @@ Validate all `metadata.yaml` files in the repo:
#### Options
| Option | Default | Description |
|--------------------|--------------|----------------------------------------------------------------------------------------------------------------------------|
| ------------------ | ------------ | -------------------------------------------------------------------------------------------------------------------------- |
| `--modified/--all` | `--modified` | Flag to run validation of `metadata.yaml` files on the modified files in the head commit or all the `metadata.yaml` files. |
### <a id="metadata-upload-command"></a>`metadata upload` command
@@ -341,7 +341,7 @@ Upload all the `metadata.yaml` files to a GCS bucket:
#### Options
| Option | Required | Default | Mapped environment variable | Description |
|---------------------|----------|--------------|-----------------------------|--------------------------------------------------------------------------------------------------------------------------|
| ------------------- | -------- | ------------ | --------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
| `--gcs-credentials` | True | | `GCS_CREDENTIALS` | Service account credentials in JSON format with permission to get and upload on the GCS bucket |
| `--modified/--all` | True | `--modified` | | Flag to upload the modified `metadata.yaml` files in the head commit or all the `metadata.yaml` files to a GCS bucket. |
@@ -379,7 +379,8 @@ This command runs the Python tests for a airbyte-ci poetry package.
## Changelog
| Version | PR | Description |
|---------|-----------------------------------------------------------|----------------------------------------------------------------------------------------------|
| ------- | --------------------------------------------------------- | -------------------------------------------------------------------------------------------- |
| 0.4.3 | [#29033](https://github.com/airbytehq/airbyte/pull/29033) | Disable dependency scanning for Java connectors. |
| 0.4.2 | [#29030](https://github.com/airbytehq/airbyte/pull/29030) | Make report path always have the same prefix: `airbyte-ci/`. |
| 0.4.1 | [#28855](https://github.com/airbytehq/airbyte/pull/28855) | Improve the selected connectors detection for connectors commands. |
| 0.4.0 | [#28947](https://github.com/airbytehq/airbyte/pull/28947) | Show Dagger Cloud run URLs in CI |

View File

@@ -57,6 +57,7 @@ def get_selected_connectors_with_modified_files(
modified: bool,
metadata_changes_only: bool,
modified_files: Set[Path],
enable_dependency_scanning: bool = False,
) -> List[ConnectorWithModifiedFiles]:
"""Get the connectors that match the selected criteria.
@@ -67,6 +68,7 @@ def get_selected_connectors_with_modified_files(
modified (bool): Whether to select the modified connectors.
metadata_changes_only (bool): Whether to select only the connectors with metadata changes.
modified_files (Set[Path]): The modified files.
enable_dependency_scanning (bool): Whether to enable the dependency scanning.
Returns:
List[ConnectorWithModifiedFiles]: The connectors that match the selected criteria.
"""
@@ -75,7 +77,9 @@ def get_selected_connectors_with_modified_files(
main_logger.info("--metadata-changes-only overrides --modified")
modified = True
selected_modified_connectors = get_modified_connectors(modified_files) if modified else set()
selected_modified_connectors = (
get_modified_connectors(modified_files, ALL_CONNECTORS, enable_dependency_scanning) if modified else set()
)
selected_connectors_by_name = {c for c in ALL_CONNECTORS if c.technical_name in selected_names}
selected_connectors_by_release_stage = {connector for connector in ALL_CONNECTORS if connector.release_stage in selected_release_stages}
selected_connectors_by_language = {connector for connector in ALL_CONNECTORS if connector.language in selected_languages}
@@ -139,6 +143,12 @@ def get_selected_connectors_with_modified_files(
default=None,
type=int,
)
@click.option(
"--enable-dependency-scanning/--disable-dependency-scanning",
help="When enabled, the dependency scanning will be performed to detect the connectors to test according to a dependency change.",
default=False,
type=bool,
)
@click.pass_context
def connectors(
ctx: click.Context,
@@ -150,6 +160,7 @@ def connectors(
metadata_changes_only: bool,
concurrency: int,
execute_timeout: int,
enable_dependency_scanning: bool,
):
"""Group all the connectors-ci command."""
validate_environment(ctx.obj["is_local"], use_remote_secrets)
@@ -159,7 +170,7 @@ def connectors(
ctx.obj["concurrency"] = concurrency
ctx.obj["execute_timeout"] = execute_timeout
ctx.obj["selected_connectors_with_modified_files"] = get_selected_connectors_with_modified_files(
names, release_stages, languages, modified, metadata_changes_only, ctx.obj["modified_files"]
names, release_stages, languages, modified, metadata_changes_only, ctx.obj["modified_files"], enable_dependency_scanning
)
log_selected_connectors(ctx.obj["selected_connectors_with_modified_files"])

View File

@@ -21,7 +21,7 @@ import anyio
import asyncer
import click
import git
from connector_ops.utils import get_all_connectors_in_repo, get_changed_connectors
from connector_ops.utils import get_changed_connectors
from dagger import Client, Config, Connection, Container, DaggerError, ExecError, File, ImageLayerCompression, QueryError, Secret
from google.cloud import storage
from google.oauth2 import service_account
@@ -40,7 +40,6 @@ METADATA_FILE_NAME = "metadata.yaml"
METADATA_ICON_FILE_NAME = "icon.svg"
DIFF_FILTER = "MADRT" # Modified, Added, Deleted, Renamed, Type changed
IGNORED_FILE_EXTENSIONS = [".md"]
ALL_CONNECTOR_DEPENDENCIES = [(connector, connector.get_local_dependency_paths()) for connector in get_all_connectors_in_repo()]
STATIC_REPORT_PREFIX = "airbyte-ci"
@@ -324,16 +323,19 @@ def _is_ignored_file(file_path: Union[str, Path]) -> bool:
return Path(file_path).suffix in IGNORED_FILE_EXTENSIONS
def _find_modified_connectors(file_path: Union[str, Path], dependency_scanning: bool = True) -> Set[Connector]:
def _find_modified_connectors(
file_path: Union[str, Path], all_connectors: Set[Connector], dependency_scanning: bool = True
) -> Set[Connector]:
"""Find all connectors impacted by the file change."""
modified_connectors = set()
for connector, connector_dependencies in ALL_CONNECTOR_DEPENDENCIES:
for connector in all_connectors:
if Path(file_path).is_relative_to(Path(connector.code_directory)):
main_logger.info(f"Adding connector '{connector}' due to connector file modification: {file_path}.")
modified_connectors.add(connector)
if dependency_scanning:
for connector_dependency in connector_dependencies:
for connector_dependency in connector.get_local_dependency_paths():
if Path(file_path).is_relative_to(Path(connector_dependency)):
# Add the connector to the modified connectors
modified_connectors.add(connector)
@@ -341,10 +343,10 @@ def _find_modified_connectors(file_path: Union[str, Path], dependency_scanning:
return modified_connectors
def get_modified_connectors(modified_files: Set[Path], dependency_scanning: bool = True) -> Set[Connector]:
def get_modified_connectors(modified_files: Set[Path], all_connectors: Set[Connector], dependency_scanning: bool) -> Set[Connector]:
"""Create a mapping of modified connectors (key) and modified files (value).
As we call connector.get_local_dependencies_paths() any modification to a dependency will trigger connector pipeline for all connectors that depend on it.
The get_local_dependencies_paths function currently computes dependencies for Java connectors only.
If dependency scanning is enabled any modification to a dependency will trigger connector pipeline for all connectors that depend on it.
It currently works only for Java connectors .
It's especially useful to trigger tests of strict-encrypt variant when a change is made to the base connector.
Or to tests all jdbc connectors when a change is made to source-jdbc or base-java.
We'll consider extending the dependency resolution to Python connectors once we confirm that it's needed and feasible in term of scale.
@@ -353,7 +355,7 @@ def get_modified_connectors(modified_files: Set[Path], dependency_scanning: bool
modified_connectors = set()
for modified_file in modified_files:
if not _is_ignored_file(modified_file):
modified_connectors.update(_find_modified_connectors(modified_file, dependency_scanning))
modified_connectors.update(_find_modified_connectors(modified_file, all_connectors, dependency_scanning))
return modified_connectors

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "pipelines"
version = "0.4.2"
version = "0.4.3"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <contact@airbyte.io>"]

View File

@@ -1,8 +1,10 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import os
import sys
from pathlib import Path
from typing import Set
import dagger
import git
@@ -10,6 +12,7 @@ import pytest
import requests
from connector_ops.utils import Connector
from pipelines import utils
from tests.utils import ALL_CONNECTORS
@pytest.fixture(scope="session")
@@ -49,3 +52,20 @@ def new_connector(airbyte_repo_path: Path, mocker) -> Connector:
yield Connector("source-new-connector")
new_connector_code_directory.joinpath("metadata.yaml").unlink()
new_connector_code_directory.rmdir()
@pytest.fixture(autouse=True, scope="session")
def from_airbyte_root(airbyte_repo_path):
"""
Change the working directory to the root of the Airbyte repo.
This will make all the tests current working directory to be the root of the Airbyte repo as we've set autouse=True.
"""
original_dir = Path.cwd()
os.chdir(airbyte_repo_path)
yield airbyte_repo_path
os.chdir(original_dir)
@pytest.fixture(scope="session")
def all_connectors() -> Set[Connector]:
return ALL_CONNECTORS

View File

@@ -1,24 +1,14 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import os
import random
from pathlib import Path
from typing import Callable
import pytest
from click.testing import CliRunner
from connector_ops.utils import METADATA_FILE_NAME, Connector, ConnectorLanguage, get_all_connectors_in_repo
from connector_ops.utils import METADATA_FILE_NAME, ConnectorLanguage
from pipelines.bases import ConnectorWithModifiedFiles
from pipelines.commands.groups import connectors
@pytest.fixture(autouse=True, scope="module")
def from_airbyte_root(airbyte_repo_path):
original_dir = Path.cwd()
os.chdir(airbyte_repo_path)
yield airbyte_repo_path
os.chdir(original_dir)
from tests.utils import pick_a_random_connector
@pytest.fixture(scope="session")
@@ -26,25 +16,6 @@ def runner():
return CliRunner()
ALL_CONNECTORS = get_all_connectors_in_repo()
def pick_a_random_connector(
language: ConnectorLanguage = None, release_stage: str = None, other_picked_connectors: list = None
) -> Connector:
"""Pick a random connector from the list of all connectors."""
all_connectors = list(ALL_CONNECTORS)
if language:
all_connectors = [c for c in all_connectors if c.language is language]
if release_stage:
all_connectors = [c for c in all_connectors if c.release_stage == release_stage]
picked_connector = random.choice(all_connectors)
if other_picked_connectors:
while picked_connector in other_picked_connectors:
picked_connector = random.choice(all_connectors)
return picked_connector
def test_get_selected_connectors_by_name_no_file_modification():
connector = pick_a_random_connector()
selected_connectors = connectors.get_selected_connectors_with_modified_files(

View File

@@ -1,10 +1,13 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from pathlib import Path
from unittest import mock
import pytest
from connector_ops.utils import ConnectorLanguage
from pipelines import utils
from tests.utils import pick_a_random_connector
@pytest.mark.parametrize(
@@ -118,3 +121,22 @@ from pipelines import utils
)
def test_render_report_output_prefix(ctx, expected):
assert utils.DaggerPipelineCommand.render_report_output_prefix(ctx) == expected
@pytest.mark.parametrize("enable_dependency_scanning", [True, False])
def test_get_modified_connectors_with_dependency_scanning(all_connectors, enable_dependency_scanning):
base_java_changed_file = Path("airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/BaseConnector.java")
modified_files = [base_java_changed_file]
not_modified_java_connector = pick_a_random_connector(language=ConnectorLanguage.JAVA)
modified_java_connector = pick_a_random_connector(
language=ConnectorLanguage.JAVA, other_picked_connectors=[not_modified_java_connector]
)
modified_files.append(modified_java_connector.code_directory / "foo.bar")
modified_connectors = utils.get_modified_connectors(modified_files, all_connectors, enable_dependency_scanning)
if enable_dependency_scanning:
assert not_modified_java_connector in modified_connectors
else:
assert not_modified_java_connector not in modified_connectors
assert modified_java_connector in modified_connectors

View File

@@ -0,0 +1,24 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import random
from connector_ops.utils import Connector, ConnectorLanguage, get_all_connectors_in_repo
ALL_CONNECTORS = get_all_connectors_in_repo()
def pick_a_random_connector(
language: ConnectorLanguage = None, release_stage: str = None, other_picked_connectors: list = None
) -> Connector:
"""Pick a random connector from the list of all connectors."""
all_connectors = list(ALL_CONNECTORS)
if language:
all_connectors = [c for c in all_connectors if c.language is language]
if release_stage:
all_connectors = [c for c in all_connectors if c.release_stage == release_stage]
picked_connector = random.choice(all_connectors)
if other_picked_connectors:
while picked_connector in other_picked_connectors:
picked_connector = random.choice(all_connectors)
return picked_connector