1
0
mirror of synced 2025-12-19 18:14:56 -05:00

airbyte-ci format: exclude .gitignored files from format (#33249)

Co-authored-by: Marius Posta <marius@airbyte.io>
This commit is contained in:
Augustin
2023-12-13 10:33:37 +01:00
committed by GitHub
parent ce7554021f
commit 098a285b2f
8 changed files with 53 additions and 33 deletions

View File

@@ -152,6 +152,7 @@ At this point you can run `airbyte-ci` commands.
| `--pipeline-start-timestamp` | Current epoch time | `CI_PIPELINE_START_TIMESTAMP` | Start time of the pipeline as epoch time. Used for pipeline run duration computation. |
| `--show-dagger-logs/--hide-dagger-logs` | `--hide-dagger-logs` | | Flag to show or hide the dagger logs. |
### <a id="connectors-command-subgroup"></a>`connectors` command subgroup
Available commands:
@@ -501,6 +502,7 @@ This command runs the Python tests for a airbyte-ci poetry package.
| Version | PR | Description |
| ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
| 2.10.8 | [#33249](https://github.com/airbytehq/airbyte/pull/33249) | Exclude git ignored files from formatting. |
| 2.10.7 | [#33248](https://github.com/airbytehq/airbyte/pull/33248) | Fix bug which broke airbyte-ci connectors tests when optional DockerHub credentials env vars are not set. |
| 2.10.6 | [#33170](https://github.com/airbytehq/airbyte/pull/33170) | Remove Dagger logs from console output of `format`. |
| 2.10.5 | [#33097](https://github.com/airbytehq/airbyte/pull/33097) | Improve `format` performances, exit with 1 status code when `fix` changes files. |

View File

@@ -8,47 +8,36 @@ CACHE_MOUNT_PATH = "/cache"
LICENSE_FILE_NAME = "LICENSE_SHORT"
# TODO create .airbyte_ci_ignore files?
DEFAULT_FORMAT_IGNORE_LIST = [
"**/__init__.py", # These files has never been formatted and we don't want to start now (for now) see https://github.com/airbytehq/airbyte/issues/33296
"**/__pycache__",
'"**/.pytest_cache',
"**/.venv",
"**/venv",
"**/.gradle",
"**/node_modules",
"**/.tox",
"**/.eggs",
"**/.git",
"**/.gradle",
"**/.mypy_cache",
"**/.pytest_cache",
"**/.tox",
"**/.venv",
"**/*.egg-info",
"**/airbyte-ci/connectors/metadata_service/lib/tests/fixtures/**/invalid", # These are deliberately invalid and unformattable.
"**/build",
"**/dbt-project-template",
"**/charts", # Helm charts often have injected template strings that will fail general linting. Helm linting is done separately.
"**/dbt_test_config",
"**/dbt-project-template-clickhouse",
"**/dbt-project-template-duckdb",
"**/dbt-project-template-mssql",
"**/dbt-project-template-mysql",
"**/dbt-project-template-oracle",
"**/dbt-project-template-clickhouse",
"**/dbt-project-template-snowflake",
"**/dbt-project-template-tidb",
"**/dbt-project-template-duckdb",
"**/dbt_test_config",
"**/dbt-project-template",
"**/node_modules",
"**/normalization_test_output",
# '**/tools',
"**/secrets",
"**/charts", # Helm charts often have injected template strings that will fail general linting. Helm linting is done separately.
"**/resources/seed/*_catalog.json", # Do not remove - this is also necessary to prevent diffs in our github workflows
"**/resources/seed/*_registry.json", # Do not remove - this is also necessary to prevent diffs in our github workflows
"**/resources/seed/specs_secrets_mask.yaml", # Downloaded externally.
"**/resources/examples/airflow/superset/docker/pythonpath_dev/superset_config.py",
"**/source-amplitude/unit_tests/api_data/zipped.json", # Zipped file presents as non-UTF-8 making spotless sad
"**/airbyte-connector-builder-server/connector_builder/generated", # autogenerated code doesn't need to be formatted
"**/airbyte-ci/connectors/metadata_service/lib/tests/fixtures/**/invalid", # These are deliberately invalid and unformattable.
"**/__init__.py",
"**/declarative_component_schema.py",
"**/source-stock-ticker-api-tutorial/source.py",
"**/tools/git_hooks/tests/test_spec_linter.py",
"**/tools/schema_generator/schema_generator/infer_schemas.py",
"**/.git",
"airbyte-cdk/python/airbyte_cdk/sources/declarative/models/**", # These files are generated and should not be formatted
"airbyte-ci/connectors/pipelines/tests/test_format/non_formatted_code", # This is a test directory with badly formatted code
"airbyte-ci/connectors/pipelines/pipeline_reports", # This is a directory with generated reports that should not be formatted
]

View File

@@ -15,6 +15,7 @@ from pipelines import main_logger
from pipelines.airbyte_ci.format.actions import list_files_in_directory
from pipelines.airbyte_ci.format.configuration import Formatter
from pipelines.airbyte_ci.format.consts import DEFAULT_FORMAT_IGNORE_LIST, REPO_MOUNT_PATH, WARM_UP_INCLUSIONS
from pipelines.consts import GIT_IMAGE
from pipelines.helpers import sentry_utils
from pipelines.helpers.cli import LogOptions, log_command_results
from pipelines.helpers.utils import sh_dash_c
@@ -75,16 +76,38 @@ class FormatCommand(click.Command):
message = f"{message}."
return message
def get_dir_to_format(self, dagger_client: dagger.Client) -> dagger.Directory:
"""Get the directory to format according to the file_filter.
def get_dir_to_format(self, dagger_client) -> Directory:
"""Get a directory with all the source code to format according to the file_filter.
We mount the the files to format in a git container and remove all gitignored files.
It ensures we're not formatting files that are gitignored.
Args:
dagger_client (dagger.Client): The dagger client to use to get the directory
dagger_client (dagger.Client): The dagger client to use to get the directory.
Returns:
dagger.Directory: The directory to format
Directory: The directory with the files to format that are not gitignored.
"""
return dagger_client.host().directory(self.LOCAL_REPO_PATH, include=self.file_filter, exclude=DEFAULT_FORMAT_IGNORE_LIST)
# Load a directory from the host with all the files to format according to the file_filter and the .gitignore files
dir_to_format = dagger_client.host().directory(
self.LOCAL_REPO_PATH, include=self.file_filter + [".gitignore"], exclude=DEFAULT_FORMAT_IGNORE_LIST
)
return (
dagger_client.container()
.from_(GIT_IMAGE)
.with_workdir(REPO_MOUNT_PATH)
.with_mounted_directory(REPO_MOUNT_PATH, dir_to_format)
# All with_exec commands below will re-run if the to_format directory changes
.with_exec(["init"])
# Remove all gitignored files
.with_exec(["clean", "-dfqX"])
# Delete all .gitignore files
.with_exec(sh_dash_c(['find . -type f -name ".gitignore" -exec rm {} \;']), skip_entrypoint=True)
# Delete .git
.with_exec(["rm", "-rf", ".git"], skip_entrypoint=True)
.directory(REPO_MOUNT_PATH)
.with_timestamps(0)
)
@pass_pipeline_context
@sentry_utils.with_command_context
@@ -109,6 +132,7 @@ class FormatCommand(click.Command):
pipeline_name=f"Format {self.formatter.value}", log_output=dagger_logs
)
dir_to_format = self.get_dir_to_format(dagger_client)
container = self.get_format_container_fn(dagger_client, dir_to_format)
command_result = await self.get_format_command_result(dagger_client, container, dir_to_format)

View File

@@ -41,6 +41,7 @@ DOCKER_CLI_IMAGE = f"docker:{DOCKER_VERSION}-cli"
DOCKER_REGISTRY_MIRROR_URL = os.getenv("DOCKER_REGISTRY_MIRROR_URL")
DOCKER_REGISTRY_ADDRESS = "docker.io"
DOCKER_VAR_LIB_VOLUME_NAME = "docker-cache"
GIT_IMAGE = "alpine/git:latest"
GRADLE_CACHE_PATH = "/root/.gradle/caches"
GRADLE_BUILD_CACHE_PATH = f"{GRADLE_CACHE_PATH}/build-cache-1"
GRADLE_READ_ONLY_DEPENDENCY_CACHE_PATH = "/root/gradle_dependency_cache"

View File

@@ -0,0 +1 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

View File

@@ -3,7 +3,10 @@
#
import functools
from typing import Set
import os
import re
from pathlib import Path
from typing import List, Set
import git
from dagger import Connection

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "pipelines"
version = "2.10.7"
version = "2.10.8"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <contact@airbyte.io>"]

View File

@@ -26,10 +26,10 @@ import json
import os
import sys
import genson.schema.strategies as strategies
from airbyte_cdk.models import AirbyteMessage, Type
from genson import SchemaBuilder
from genson.schema.strategies.object import Object
import genson.schema.strategies as strategies
class NoRequiredObj(Object):