From 810b802fdac69fb0dc72b3e39ea0dd279b41bbec Mon Sep 17 00:00:00 2001 From: Augustin Date: Tue, 25 Jun 2024 02:34:18 +0200 Subject: [PATCH] connector-insights: add code tracker (#40152) --- .github/workflows/connectors_insights.yml | 6 +- .../connectors/connectors_insights/README.md | 4 + .../connectors_insights/pyproject.toml | 2 +- .../src/connectors_insights/insights.py | 41 +++++++++- .../src/connectors_insights/models.py | 3 + .../src/connectors_insights/pylint.py | 55 +++++++++++++ .../cdk_deprecation_checkers.py | 77 +++++++++++++++++++ .../src/connectors_insights/utils.py | 31 +++++++- 8 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint.py create mode 100644 airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint_plugins/cdk_deprecation_checkers.py diff --git a/.github/workflows/connectors_insights.yml b/.github/workflows/connectors_insights.yml index 02c14844201..53f2389ba16 100644 --- a/.github/workflows/connectors_insights.yml +++ b/.github/workflows/connectors_insights.yml @@ -4,7 +4,9 @@ on: schedule: - cron: "0 0,12 * * *" # Run every 12 hours UTC workflow_dispatch: - + inputs: + rewrite: + default: false jobs: connectors_insights: name: Connectors Insights generation @@ -46,4 +48,4 @@ jobs: run: echo "GOOGLE_APPLICATION_CREDENTIALS=$HOME/gcp-sa-key.json" >> $GITHUB_ENV - name: Run connectors insights run: | - poetry -C airbyte-ci/connectors/connectors_insights run connectors-insights generate --gcs-uri=gs://prod-airbyte-cloud-connector-metadata-service/connector_insights --connector-directory airbyte-integrations/connectors/ --concurrency 10 + poetry -C airbyte-ci/connectors/connectors_insights run connectors-insights generate --gcs-uri=gs://prod-airbyte-cloud-connector-metadata-service/connector_insights --connector-directory airbyte-integrations/connectors/ --concurrency 10 {{ inputs.rewrite == true && '--rewrite' || ''}} diff --git a/airbyte-ci/connectors/connectors_insights/README.md b/airbyte-ci/connectors/connectors_insights/README.md index ddb1edb9ecb..8ed18bc2a32 100644 --- a/airbyte-ci/connectors/connectors_insights/README.md +++ b/airbyte-ci/connectors/connectors_insights/README.md @@ -56,5 +56,9 @@ This CLI is currently running nightly in GitHub Actions. The workflow can be fou ## Changelog +### 0.2.0 +- Detect deprecated class and module use in connectors. +- Fix missing CDK version for connectors not declaring a CDK name in their metadata. + ### 0.1.0 - Initial release diff --git a/airbyte-ci/connectors/connectors_insights/pyproject.toml b/airbyte-ci/connectors/connectors_insights/pyproject.toml index b359575a7b0..e0c7c8a4647 100644 --- a/airbyte-ci/connectors/connectors_insights/pyproject.toml +++ b/airbyte-ci/connectors/connectors_insights/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "connectors-insights" -version = "0.1.0" +version = "0.2.0" description = "" authors = ["Airbyte "] readme = "README.md" diff --git a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/insights.py b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/insights.py index b28c7820c81..acc238b72a9 100644 --- a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/insights.py +++ b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/insights.py @@ -6,10 +6,12 @@ import datetime import itertools import json import logging +import re from typing import TYPE_CHECKING from connectors_insights.hacks import get_ci_on_master_report from connectors_insights.models import ConnectorInsights +from connectors_insights.pylint import get_pylint_output from connectors_insights.result_backends import FileToPersist, ResultBackend from connectors_insights.sbom import get_json_sbom @@ -65,11 +67,40 @@ def get_sbom_inferred_insights(raw_sbom: str | None, connector: Connector) -> Di dependency = {"type": artifact["type"], "version": artifact["version"], "package_name": artifact["name"]} if isinstance(sbom_inferred_insights["dependencies"], list) and dependency not in sbom_inferred_insights["dependencies"]: sbom_inferred_insights["dependencies"].append(dependency) - if connector.cdk_name == "python" or connector.cdk_name == "low-code": - sbom_inferred_insights["cdk_version"] = python_artifacts.get("airbyte-cdk", {}).get("version") + sbom_inferred_insights["cdk_version"] = python_artifacts.get("airbyte-cdk", {}).get("version") return sbom_inferred_insights +def get_pylint_inferred_insights(pylint_output: str | None) -> Dict: + """Make insights from the pylint output. + It currently parses the deprecated classes and modules from the pylint output. + """ + if not pylint_output: + return {} + + deprecated_classes_in_use = [] + deprecated_modules_in_use = [] + forbidden_method_names_in_use = [] + deprecated_class_pattern = r"Using deprecated class (\w+) of module ([\w\.]+)" + deprecated_module_pattern = r"Deprecated module '([^']+)'" + forbidden_method_name_pattern = r'Method name "([^"]+)"' + for message in json.loads(pylint_output): + if message["symbol"] == "deprecated-class": + if match := re.search(deprecated_class_pattern, message["message"]): + deprecated_classes_in_use.append(f"{match.group(2)}.{match.group(1)}") + if message["symbol"] == "deprecated-module": + if match := re.search(deprecated_module_pattern, message["message"]): + deprecated_modules_in_use.append(match.group(1)) + if message["symbol"] == "forbidden-method-name": + if match := re.search(forbidden_method_name_pattern, message["message"]): + forbidden_method_names_in_use.append(match.group(1)) + return { + "deprecated_classes_in_use": deprecated_classes_in_use, + "deprecated_modules_in_use": deprecated_modules_in_use, + "forbidden_method_names_in_use": forbidden_method_names_in_use, + } + + def should_skip_generation( result_backends: List[ResultBackend] | None, connector: Connector, files_to_persist: List[FileToPersist], rewrite: bool ) -> bool: @@ -110,7 +141,7 @@ async def fetch_sbom(dagger_client: dagger.Client, connector: Connector) -> str return None -def generate_insights(connector: Connector, sbom: str | None) -> ConnectorInsights: +def generate_insights(connector: Connector, sbom: str | None, pylint_output: str | None) -> ConnectorInsights: """Generate insights for the connector. Args: @@ -124,6 +155,7 @@ def generate_insights(connector: Connector, sbom: str | None) -> ConnectorInsigh return ConnectorInsights( **{ **get_metadata_inferred_insights(connector), + **get_pylint_inferred_insights(pylint_output), **get_sbom_inferred_insights(sbom, connector), "ci_on_master_report": ci_on_master_report, "ci_on_master_passes": ci_on_master_report.get("success") if ci_on_master_report else None, @@ -195,11 +227,12 @@ async def generate_insights_for_connector( logger.info(f"Generating insights for {connector.technical_name}") result_backends = result_backends or [] + pylint_output = await get_pylint_output(dagger_client, connector) raw_sbom = await fetch_sbom(dagger_client, connector) if raw_sbom: sbom_file.set_file_content(raw_sbom) - insights = generate_insights(connector, raw_sbom) + insights = generate_insights(connector, raw_sbom, pylint_output) insights_file.set_file_content(insights.json()) persist_files(connector, files_to_persist, result_backends, rewrite, logger) logger.info(f"Finished generating insights for {connector.technical_name}") diff --git a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/models.py b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/models.py index 99de4a333ab..9c8a22b05cf 100644 --- a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/models.py +++ b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/models.py @@ -31,6 +31,9 @@ class ConnectorInsights(BaseModel): dependencies: list[dict] is_cloud_enabled: StrictBool is_oss_enabled: StrictBool + deprecated_classes_in_use: list[str] | None + deprecated_modules_in_use: list[str] | None + forbidden_method_names_in_use: list[str] | None class Config: json_encoders = {dict: lambda v: json.dumps(v, default=pydantic_encoder)} diff --git a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint.py b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint.py new file mode 100644 index 00000000000..ac795cfe93f --- /dev/null +++ b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + +from __future__ import annotations + +import os +from pathlib import Path +from typing import TYPE_CHECKING + +from connector_ops.utils import ConnectorLanguage # type: ignore +from connectors_insights.utils import never_fail_exec + +if TYPE_CHECKING: + + import dagger + from connector_ops.utils import Connector # type: ignore + +PYLINT_COMMAND = [ + "pylint", + "--load-plugins=custom_plugin", + "--disable=all", + "--output-format=json", + "--enable=deprecated-class", + "--enable=deprecated-module", + "--enable=forbidden-method-name", + ".", +] + + +async def get_pylint_output(dagger_client: dagger.Client, connector: Connector) -> str | None: + """Invoke pylint to check for deprecated classes and modules in the connector code. + We use the custom plugin cdk_deprecation_checkers.py to check for deprecated classes and modules. + The plugin is located in the `pylint_plugins` directory. + + Args: + dagger_client (dagger.Client): Current dagger client. + connector (Connector): Connector object. + + Returns: + str | None: Pylint output. + """ + if connector.language not in [ConnectorLanguage.PYTHON, ConnectorLanguage.LOW_CODE]: + return None + cdk_deprecation_checker_path = Path(os.path.abspath(__file__)).parent / "pylint_plugins/cdk_deprecation_checkers.py" + + return await ( + dagger_client.container() + .from_(connector.image_address) + .with_new_file("__init__.py", contents="") + .with_exec(["pip", "install", "pylint"], skip_entrypoint=True) + .with_workdir(connector.technical_name.replace("-", "_")) + .with_env_variable("PYTHONPATH", ".") + .with_new_file("custom_plugin.py", contents=cdk_deprecation_checker_path.read_text()) + .with_(never_fail_exec(PYLINT_COMMAND)) + .stdout() + ) diff --git a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint_plugins/cdk_deprecation_checkers.py b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint_plugins/cdk_deprecation_checkers.py new file mode 100644 index 00000000000..a961e1a3e22 --- /dev/null +++ b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/pylint_plugins/cdk_deprecation_checkers.py @@ -0,0 +1,77 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + +from __future__ import annotations + +from collections import namedtuple +from typing import TYPE_CHECKING + +from pylint.checkers import BaseChecker, DeprecatedMixin # type: ignore + +if TYPE_CHECKING: + from typing import Set + + import astroid # type: ignore + from pylint.lint import PyLinter # type: ignore + +DeprecatedClass = namedtuple("DeprecatedClass", ["module", "name"]) + +DEPRECATED_CLASSES: Set[DeprecatedClass] = { + DeprecatedClass("airbyte_cdk.logger", "AirbyteLogger"), + DeprecatedClass(None, "GoogleAnalyticsDataApiBaseStream"), +} + +DEPRECATED_MODULES = {"airbyte_cdk.sources.streams.http.auth"} + +FORBIDDEN_METHOD_NAMES = {"get_updated_state"} + + +class ForbiddenMethodNameChecker(BaseChecker): + name = "forbidden-method-name-checker" + msgs = { + "C9001": ('Method name "%s" is forbidden', "forbidden-method-name", "Used when a forbidden method name is detected."), + } + + def visit_functiondef(self, node: astroid.node) -> None: + if node.name in FORBIDDEN_METHOD_NAMES: + self.add_message("forbidden-method-name", node=node, args=(node.name,)) + + +class DeprecationChecker(DeprecatedMixin, BaseChecker): + """Check for deprecated classes and modules. + The DeprecatedMixin class is here: + https://github.com/pylint-dev/pylint/blob/a5a77f6e891f6e143439d19b5e7f0a29eb5ea1cd/pylint/checkers/deprecated.py#L31 + """ + + name = "deprecated" + + msgs = { + **DeprecatedMixin.DEPRECATED_METHOD_MESSAGE, + **DeprecatedMixin.DEPRECATED_ARGUMENT_MESSAGE, + **DeprecatedMixin.DEPRECATED_CLASS_MESSAGE, + **DeprecatedMixin.DEPRECATED_MODULE_MESSAGE, + } + + def deprecated_modules(self) -> set[str]: + """Callback method called by DeprecatedMixin for every module found in the code. + + Returns: + collections.abc.Container of deprecated module names. + """ + return DEPRECATED_MODULES + + def deprecated_classes(self, module: str) -> set[str]: + """Callback method called by DeprecatedMixin for every class found in the code. + + Returns: + collections.abc.Container of deprecated class names. + """ + _deprecated_classes = set() + for deprecated_class in DEPRECATED_CLASSES: + if deprecated_class.module is None or deprecated_class.module == module: + _deprecated_classes.add(deprecated_class.name) + return _deprecated_classes + + +def register(linter: PyLinter) -> None: + linter.register_checker(DeprecationChecker(linter)) + linter.register_checker(ForbiddenMethodNameChecker(linter)) diff --git a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/utils.py b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/utils.py index f51e85ebe7a..7ab30ef9a2d 100644 --- a/airbyte-ci/connectors/connectors_insights/src/connectors_insights/utils.py +++ b/airbyte-ci/connectors/connectors_insights/src/connectors_insights/utils.py @@ -4,7 +4,11 @@ from __future__ import annotations import re from pathlib import Path -from typing import Set, Tuple +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Set, Tuple, List, Callable + from dagger import Container from connector_ops.utils import METADATA_FILE_NAME, Connector # type: ignore @@ -54,3 +58,28 @@ def gcs_uri_to_bucket_key(gcs_uri: str) -> Tuple[str, str]: bucket, key = match.groups() return bucket, key + + +def never_fail_exec(command: List[str]) -> Callable[[Container], Container]: + """ + Wrap a command execution with some bash sugar to always exit with a 0 exit code but write the actual exit code to a file. + + Underlying issue: + When a classic dagger with_exec is returning a >0 exit code an ExecError is raised. + It's OK for the majority of our container interaction. + But some execution, like running CAT, are expected to often fail. + In CAT we don't want ExecError to be raised on container interaction because CAT might write updated secrets that we need to pull from the container after the test run. + The bash trick below is a hack to always return a 0 exit code but write the actual exit code to a file. + The file is then read by the pipeline to determine the exit code of the container. + + Args: + command (List[str]): The command to run in the container. + + Returns: + Callable: _description_ + """ + + def never_fail_exec_inner(container: Container) -> Container: + return container.with_exec(["sh", "-c", f"{' '.join(command)}; echo $? > /exit_code"], skip_entrypoint=True) + + return never_fail_exec_inner