1
0
mirror of synced 2025-12-19 18:14:56 -05:00

gradle: split off python cdk (#35306)

This commit is contained in:
Marius Posta
2024-02-16 04:39:14 -08:00
committed by GitHub
parent b741045f50
commit dc088bc3a9
18 changed files with 551 additions and 554 deletions

View File

@@ -73,7 +73,7 @@ jobs:
repository: ${{ github.event.inputs.repo }}
ref: ${{ github.event.inputs.gitref }}
- name: Build CDK Package
run: ./gradlew --no-daemon --no-build-cache :airbyte-cdk:python:build
run: (cd airbyte-cdk/python; ./gradlew --no-daemon --no-build-cache :build)
- name: Post failure to Slack channel dev-connectors-extensibility
if: ${{ failure() }}
uses: slackapi/slack-github-action@v1.23.0

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -e
DOCKER_BUILD_ARCH="${DOCKER_BUILD_ARCH:-amd64}"
# https://docs.docker.com/develop/develop-images/build_enhancements/
export DOCKER_BUILDKIT=1
CODE_GENERATOR_DOCKERFILE="$(dirname $0)/../code-generator/Dockerfile"
test -f $CODE_GENERATOR_DOCKERFILE
docker build --build-arg DOCKER_BUILD_ARCH="$DOCKER_BUILD_ARCH" -t "airbyte/code-generator:dev" - < $CODE_GENERATOR_DOCKERFILE

View File

@@ -1,25 +1,134 @@
import ru.vyarus.gradle.plugin.python.task.PythonTask
plugins {
id 'airbyte-python'
id 'airbyte-docker-legacy'
id 'base'
id 'ru.vyarus.use-python' version '2.3.0'
}
def generateCodeGeneratorImage = tasks.register('generateCodeGeneratorImage', Exec) {
commandLine 'bin/build_code_generator_image.sh'
}
def generateComponentManifestClassFiles = tasks.register('generateComponentManifestClassFiles', Exec) {
environment 'ROOT_DIR', rootDir.absolutePath
environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath
commandLine 'bin/generate-component-manifest-files.sh'
}
generateComponentManifestClassFiles.configure {
dependsOn project(':tools:code-generator').tasks.named('assemble')
dependsOn generateCodeGeneratorImage
}
tasks.register('generate').configure {
dependsOn generateComponentManifestClassFiles
}
tasks.register('validateSourceYamlManifest', Exec) {
environment 'ROOT_DIR', rootDir.absolutePath
environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath
commandLine 'bin/validate-yaml-schema.sh'
}
tasks.register('runLowCodeConnectorUnitTests', Exec) {
environment 'ROOT_DIR', rootDir.absolutePath
environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath
commandLine 'bin/low-code-unit-tests.sh'
}
def venvDirectoryName = '.venv'
// Add a task that allows cleaning up venvs to every python project
def cleanPythonVenv = tasks.register('cleanPythonVenv', Exec) {
commandLine 'rm'
args '-rf', "${projectDir.absolutePath}/${venvDirectoryName}"
}
tasks.named('clean').configure {
dependsOn cleanPythonVenv
}
// Configure gradle python plugin.
python {
envPath = venvDirectoryName
minPythonVersion '3.10'
// Amazon Linux support.
// The airbyte-ci tool runs gradle tasks in AL2023-based containers.
// In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly.
// See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details.
try {
if ("python3.11 --version".execute().waitFor() == 0) {
// python3.11 definitely exists at this point, use it instead of 'python3'.
pythonBinary "python3.11"
}
} catch (IOException _) {
// Swallow exception if python3.11 is not installed.
}
// Pyenv support.
try {
def pyenvRoot = "pyenv root".execute()
def pyenvLatest = "pyenv latest ${minPythonVersion}".execute()
// Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases.
pythonBinary "python"
if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) {
pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin"
}
} catch (IOException _) {
// Swallow exception if pyenv is not installed.
}
scope 'VIRTUALENV'
installVirtualenv = true
pip 'pip:23.2.1'
pip 'mccabe:0.6.1'
// https://github.com/csachs/pyproject-flake8/issues/13
pip 'flake8:4.0.1'
// flake8 doesn't support pyproject.toml files
// and thus there is the wrapper "pyproject-flake8" for this
pip 'pyproject-flake8:0.0.1a2'
pip 'pytest:6.2.5'
pip 'coverage[toml]:6.3.1'
}
def installLocalReqs = tasks.register('installLocalReqs', PythonTask) {
module = "pip"
command = "install .[dev,tests]"
inputs.file('setup.py')
outputs.file('build/installedlocalreqs.txt')
}
def flakeCheck = tasks.register('flakeCheck', PythonTask) {
module = "pflake8"
command = "--config pyproject.toml ./"
}
def installReqs = tasks.register('installReqs', PythonTask) {
module = "pip"
command = "install .[main]"
inputs.file('setup.py')
outputs.file('build/installedreqs.txt')
}
installReqs.configure {
dependsOn installLocalReqs
}
tasks.named('check').configure {
dependsOn installReqs
dependsOn flakeCheck
}
def installTestReqs = tasks.register('installTestReqs', PythonTask) {
module = "pip"
command = "install .[tests]"
inputs.file('setup.py')
outputs.file('build/installedtestreqs.txt')
}
installTestReqs.configure {
dependsOn installReqs
}
def testTask = tasks.register('testPython', PythonTask) {
module = "coverage"
command = "run --data-file=unit_tests/.coverage.testPython --rcfile=pyproject.toml -m pytest -s unit_tests -c pytest.ini"
}
testTask.configure {
dependsOn installTestReqs
}
tasks.named('check').configure {
dependsOn testTask
}

View File

@@ -0,0 +1,11 @@
# NOTE: some of these values are overwritten in CI!
# NOTE: if you want to override this for your local machine, set overrides in ~/.gradle/gradle.properties
org.gradle.parallel=true
org.gradle.caching=true
# Note, this might have issues on the normal Github runner.
org.gradle.vfs.watch=true
# Tune # of cores Gradle uses.
# org.gradle.workers.max=3

Binary file not shown.

View File

@@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

249
airbyte-cdk/python/gradlew vendored Executable file
View File

@@ -0,0 +1,249 @@
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

92
airbyte-cdk/python/gradlew.bat vendored Normal file
View File

@@ -0,0 +1,92 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@@ -6,3 +6,39 @@ requires = [
]
build-backend = "setuptools.build_meta"
[tool.coverage.report]
fail_under = 0
skip_empty = true
sort = "-cover"
omit = [
".venv/*",
"main.py",
"setup.py",
"unit_tests/*",
"integration_tests/*",
"**/generated/*",
]
[tool.flake8]
extend-exclude = [
"*/lib/*/site-packages",
".venv",
"build",
"models",
".eggs",
"airbyte-cdk/python/airbyte_cdk/models/__init__.py",
"airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py",
".tox",
"airbyte_api_client",
"**/generated/*",
]
max-complexity = 20
max-line-length = 140
extend-ignore = [
"E203", # whitespace before ':' (conflicts with Black)
"E231", # Bad trailing comma (conflicts with Black)
"E501", # line too long (conflicts with Black)
"W503", # line break before binary operator (conflicts with Black)
]

View File

@@ -0,0 +1,29 @@
import com.gradle.scan.plugin.PublishedBuildScan
pluginManagement {
repositories {
// # Gradle looks for dependency artifacts in repositories listed in 'repositories' blocks in descending order.
gradlePluginPortal()
}
}
// Configure the gradle enterprise plugin to enable build scans. Enabling the plugin at the top of the settings file allows the build scan to record
// as much information as possible.
plugins {
id "com.gradle.enterprise" version "3.15.1"
}
ext.isCiServer = System.getenv().containsKey("CI")
gradleEnterprise {
buildScan {
termsOfServiceUrl = "https://gradle.com/terms-of-service"
termsOfServiceAgree = "yes"
uploadInBackground = !isCiServer // Disable in CI or scan URLs may not work.
buildScanPublished { PublishedBuildScan scan ->
file("scan-journal.log") << "${new Date()} - ${scan.buildScanId} - ${scan.buildScanUri}\n"
}
}
}
rootProject.name = 'airbyte-cdk-python'

View File

@@ -1,6 +1,5 @@
plugins {
id 'application'
id 'airbyte-docker-legacy'
}
application {

View File

@@ -1,6 +1,5 @@
plugins {
id 'application'
id 'airbyte-docker-legacy'
}
application {

View File

@@ -1,23 +0,0 @@
import java.nio.file.Paths
class DockerHelpers {
static String extractLabelValue(String dockerFile, String labelName) {
def file = dockerFile instanceof File ? dockerFile : new File(dockerFile)
return file.readLines()
.grep({ it.startsWith('LABEL') && it.contains(labelName) })
.get(0)
.split('=')[1]
}
static String extractImageName(String dockerFile) {
return extractLabelValue(dockerFile, "io.airbyte.name")
}
static String extractImageVersion(String dockerFile) {
return extractLabelValue(dockerFile, "io.airbyte.version")
}
static String getDevTaggedImage(projectDir, dockerfileName) {
return "${extractImageName(Paths.get(projectDir.absolutePath, dockerfileName).toString())}:dev"
}
}

View File

@@ -1,331 +0,0 @@
import java.nio.file.Paths
import java.security.MessageDigest
import java.util.concurrent.ConcurrentHashMap
import org.apache.commons.text.StringSubstitutor
import org.gradle.api.DefaultTask
import org.gradle.api.GradleException
import org.gradle.api.Plugin
import org.gradle.api.Project
import org.gradle.api.file.ConfigurableFileTree
import org.gradle.api.file.FileCollection
import org.gradle.api.tasks.CacheableTask
import org.gradle.api.tasks.Input
import org.gradle.api.tasks.InputFile
import org.gradle.api.tasks.InputFiles
import org.gradle.api.tasks.OutputFile
import org.gradle.api.tasks.PathSensitive
import org.gradle.api.tasks.PathSensitivity
import org.gradle.api.tasks.TaskAction
/**
* AirbyteDockerLegacyTask is the task which builds a docker image based on a Dockerfile.
*
* It and the other classes in this file have "Legacy" in their name because we want to get rid of this plugin in favor
* of dagger-pipeline-based tooling like `airbyte-ci`. As of the time of this writing this is already the case for
* connectors. There are still a few remaining usages outside of connectors and they are useful to support a smooth
* local java-centric development experience with gradle, especially around integration tests.
*
* Issue https://github.com/airbytehq/airbyte/issues/30708 tracks the complete removal of this plugin.
*/
@CacheableTask
abstract class AirbyteDockerLegacyTask extends DefaultTask {
@InputFiles
@PathSensitive(PathSensitivity.RELATIVE)
FileCollection filesInDockerImage
@Input
Map<String, String> baseImageHashes
@InputFile
@PathSensitive(PathSensitivity.RELATIVE)
File dockerFile
@OutputFile
File idFileOutput
@InputFile
@PathSensitive(PathSensitivity.RELATIVE)
File buildScript = project.rootProject.file('tools/bin/build_image.sh')
@TaskAction
def dockerTask() {
project.exec {
commandLine(
buildScript.absolutePath,
project.rootDir.absolutePath,
project.projectDir.absolutePath,
dockerFile.name,
DockerHelpers.getDevTaggedImage(project.projectDir, dockerFile.name),
idFileOutput.absolutePath,
)
}
}
}
/**
* AirbyteDockerLegacyTaskFactory is a convenience object to avoid passing the current project around.
*/
class AirbyteDockerLegacyTaskFactory {
private AirbyteDockerLegacyTaskFactory() {}
Project project
String dockerFileName
File dockerFile() {
return project.file(dockerFileName)
}
// This hash of the full path to the Dockerfile is the name of the task's output file.
String dockerfilePathHash() {
return MessageDigest.getInstance("MD5")
.digest(dockerFile().absolutePath.getBytes())
.encodeHex()
.toString()
}
// A superset of the files which are COPYed into the image, defined as the project file set
// with the .dockerignore rules applied to it.
// We could be more precise by parsing the Dockerfile but this is good enough in practice.
FileCollection filteredProjectFiles() {
ConfigurableFileTree files = project.fileTree(project.projectDir)
def dockerignore = project.file('.dockerignore')
if (!dockerignore.exists()) {
return files.filter {
file -> !file.toString().contains(".venv")
}
}
for (def rule : dockerignore.readLines()) {
if (rule.startsWith("#")) {
continue
}
rule = rule.trim()
files = (rule.startsWith("!") ? files.include(rule.substring(1)) : files.exclude(rule)) as ConfigurableFileTree
}
return files
}
// Queries docker for all images and their hashes.
static synchronized Map<String,String> collectKnownImageHashes(Project project) {
def stdout = new ByteArrayOutputStream()
project.rootProject.exec {
commandLine "docker", "images", "--no-trunc", "-f", "dangling=false", "--format", "{{.Repository}}:{{.Tag}} {{.ID}}"
standardOutput = stdout
}
Map<String,String> map = [:]
stdout.toString().eachLine {line ->
def splits = line.split()
map.put(splits[0], splits[1].trim())
}
return map
}
// Query all docker images at most once for all tasks, at task creation time.
static def lazyImageHashesAtTaskCreationTime = new LazyImageHashesCache()
static class LazyImageHashesCache {
private Map<String, String> lazyValue
synchronized Map<String, String> get(Project project) {
if (lazyValue == null) {
lazyValue = collectKnownImageHashes(project)
}
return lazyValue
}
}
// Global mapping of tagged image name to gradle project.
// This is populated at configuration time and accessed at task creation time.
// All keys verify isTaggedImageOwnedByThisRepo.
static def taggedImageToProject = new ConcurrentHashMap<String,Project>()
static boolean isTaggedImageOwnedByThisRepo(String taggedImage) {
if (!taggedImage.startsWith("airbyte/")) {
// Airbyte's docker images are all prefixed like this.
// Anything not with this prefix is therefore not owned by this repo.
return false
}
if (taggedImage.startsWith("airbyte/base-airbyte-protocol-python:")) {
// Special case: this image is not built by this repo.
return false
}
if (!taggedImage.endsWith(":dev")) {
// Special case: this image is owned by this repo but built separate. e.g. source-file-secure
return false
}
// Otherwise, assume the image is built by this repo.
return true
}
// Returns a mapping of each base image referenced in the Dockerfile to the corresponding hash
// in the results of collectKnownImageHashes(). If no hash was found, map to "???" instead.
Map<String, String> baseTaggedImagesAndHashes(Map<String, String> allKnownImageHashes) {
def taggedImages = new HashSet<String>()
// Look for "FROM foo AS bar" directives, and add them to the map with .put("bar", "foo")
Map<String, String> imageAliases = [:]
dockerFile().eachLine { line ->
def parts = line.split()
if (parts.length >= 4 && parts[0].equals("FROM") && parts[parts.length - 2].equals("AS")) {
imageAliases.put(parts[parts.length - 1], parts[1])
}
}
dockerFile().eachLine { line ->
if (line.startsWith("FROM ")) {
def image = line.split()[1]
assert !image.isEmpty()
taggedImages.add(image)
} else if (line.startsWith("COPY --from=")) {
def image = line.substring("COPY --from=".length()).split()[0]
assert !image.isEmpty()
if (imageAliases[image] != null) {
taggedImages.add(imageAliases[image])
} else {
taggedImages.add(image)
}
}
}
Map<String, String> result = [:]
for (def taggedImage : taggedImages) {
// Some image tags rely on environment variables (e.g. "FROM amazoncorretto:${JDK_VERSION}").
taggedImage = new StringSubstitutor(System.getenv()).replace(taggedImage).trim()
result.put(taggedImage, allKnownImageHashes.getOrDefault(taggedImage, "???"))
}
return result
}
// Create the task lazily: we shouldn't invoke 'docker' unless the task is created as part of the build.
def createTask(String taskName) {
if (!dockerFile().exists()) {
// This might not actually be necessary. It doesn't seem harmful either.
return project.tasks.register(taskName) {
logger.info "Skipping ${taskName} because ${dockerFile()} does not exist."
}
}
// Tagged name of the image to be built by this task.
def taggedImage = DockerHelpers.getDevTaggedImage(project.projectDir, dockerFileName)
// Map this project to the tagged name of the image built by this task.
taggedImageToProject.put(taggedImage, project)
// Path to the ID file to be generated by this task.
// The ID file contains the hash of the image.
def idFilePath = Paths.get(project.rootProject.rootDir.absolutePath, '.dockerversions', dockerfilePathHash())
// Register the task (lazy creation).
def airbyteDockerTask = project.tasks.register(taskName, AirbyteDockerLegacyTask) { task ->
// Set inputs.
task.filesInDockerImage = filteredProjectFiles()
task.dockerFile = this.dockerFile()
task.baseImageHashes = baseTaggedImagesAndHashes(lazyImageHashesAtTaskCreationTime.get(project))
// Set dependencies on base images built by this repo.
for (String taggedImageDependency : task.baseImageHashes.keySet()) {
if (isTaggedImageOwnedByThisRepo(taggedImageDependency)) {
task.logger.info("adding airbyteDocker task dependency: image ${taggedImage} is based on ${taggedImageDependency}")
def dependentProject = taggedImageToProject.get(taggedImageDependency)
if (dependentProject == null) {
throw new GradleException("no known project for image ${taggedImageDependency}")
}
// Depend on 'assemble' instead of 'airbyteDocker' or 'airbyteDockerTest', it's simpler that way.
task.dependsOn(dependentProject.tasks.named('assemble'))
}
}
// Set outputs.
task.idFileOutput = idFilePath.toFile()
task.outputs.upToDateWhen {
// Because the baseImageHashes is computed at task creation time, it may be stale
// at task execution time. Let's double-check.
// Missing dependency declarations in the gradle build may result in the airbyteDocker tasks
// to be created in the wrong order. Not worth breaking the build over.
for (Map.Entry<String, String> e : task.baseImageHashes) {
if (isTaggedImageOwnedByThisRepo(e.key) && e.value == "???") {
task.logger.info "Not up to date: missing at least one airbyte base image in docker"
return false
}
}
// Fetch the hashes of the required based images anew.
def allImageHashes = collectKnownImageHashes(task.project)
// If the image to be built by this task doesn't exist in docker, then it definitely should
// be built regardless of the status of the ID file.
// For instance, it's possible that a `docker image rm` occurred between consecutive
// identical gradle builds: the ID file remains untouched but the image still needs to be rebuilt.
if (!allImageHashes.containsKey(taggedImage)) {
task.logger.info "Not up to date: ID file exists but target image not found in docker"
return false
}
// If the depended-upon base images have changed in the meantime, then it follows that the target
// image needs to be rebuilt regardless of the status of the ID file.
def currentBaseImageHashes = baseTaggedImagesAndHashes(allImageHashes)
if (!task.baseImageHashes.equals(currentBaseImageHashes)) {
task.logger.info "Not up to date: at last one base image has changed in docker since task creation"
return false
}
// In all other cases, if the ID file hasn't been touched, then the task can be skipped.
return true
}
}
airbyteDockerTask.configure {
// Images for java projects always rely on the distribution tarball.
dependsOn project.tasks.matching { it.name == 'distTar' }
// Ensure that all files exist beforehand.
dependsOn project.tasks.matching { it.name == 'generate' }
}
project.tasks.named('assemble').configure {
// We may revisit the dependency on assemble but the dependency should always be on a base task.
dependsOn airbyteDockerTask
}
// Add a task to clean up when doing a gradle clean.
// Don't actually mess with docker, just delete the output file.
def airbyteDockerCleanTask = project.tasks.register(taskName + "Clean", Delete) {
delete idFilePath
}
project.tasks.named('clean').configure {
dependsOn airbyteDockerCleanTask
}
return airbyteDockerTask
}
static def build(Project project, String taskName, String dockerFileName) {
def f = new AirbyteDockerLegacyTaskFactory()
f.project = project
f.dockerFileName = dockerFileName
f.createTask(taskName)
}
}
/**
* AirbyteDockerLegacyPlugin creates an airbyteDocker task for the project when a Dockerfile is present.
*
* Following the same logic, it creates airbyteDockerTest when Dockerfile.test is present, though
* that behavior is not used anywhere except in the source-mongo connector and is therefore deprecated
* through the use of airbyte-ci.
*/
class AirbyteDockerLegacyPlugin implements Plugin<Project> {
void apply(Project project) {
AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDocker', 'Dockerfile')
// Used only for source-mongodb. Consider removing entirely.
if (project.name.endsWith('source-mongodb')) {
AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDockerTest', 'Dockerfile.test')
}
// Used for base-normalization.
if (project.name.endsWith('base-normalization')) {
['airbyteDockerMSSql' : 'mssql',
'airbyteDockerMySql' : 'mysql',
'airbyteDockerOracle' : 'oracle',
'airbyteDockerClickhouse': 'clickhouse',
'airbyteDockerSnowflake' : 'snowflake',
'airbyteDockerRedshift' : 'redshift',
'airbyteDockerTiDB' : 'tidb',
'airbyteDockerDuckDB' : 'duckdb'
].forEach {taskName, customConnector ->
AirbyteDockerLegacyTaskFactory.build(project, taskName, "${customConnector}.Dockerfile")
}
}
}
}

View File

@@ -1,185 +0,0 @@
import groovy.io.FileType
import groovy.io.FileVisitResult
import org.gradle.api.GradleException
import org.gradle.api.Plugin
import org.gradle.api.Project
import org.gradle.api.tasks.Exec
import ru.vyarus.gradle.plugin.python.task.PythonTask
class Helpers {
static addTestTaskIfTestFilesFound(Project project, String testFilesDirectory, String taskName, taskDependencies) {
"""
This method verifies if there are test files in a directory before adding the pytest task to run tests on that directory. This is needed
because if there are no tests in that dir and we run pytest on it, it exits with exit code 5 which gradle takes to mean that the process
failed, since it's non-zero. This means that if a module doesn't need a unit or integration test, it still needs to add a dummy test file
like:
```
def make_ci_pass_test():
assert True
```
So we use this method to leverage pytest's test discovery rules (https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery)
to selectively run pytest based on whether there seem to be test files in that directory.
Namely, if the directory contains a file whose name is test_*.py or *_test.py then it's a test.
See https://github.com/airbytehq/airbyte/issues/4979 for original context
"""
boolean requiresTasks = false
if (project.file(testFilesDirectory).exists()) {
def testDir = project.projectDir.toPath().resolve(testFilesDirectory)
testDir.traverse(type: FileType.FILES, nameFilter: ~/(^test_.*|.*_test)\.py$/) {file ->
requiresTasks = true
// If a file is found, terminate the traversal, thus causing this task to be declared at most once
return FileVisitResult.TERMINATE
}
}
if (!requiresTasks) {
return
}
def coverageTask = project.tasks.register(taskName, PythonTask) {
def dataFile = "${testFilesDirectory}/.coverage.${taskName}"
def rcFile = project.rootProject.file('pyproject.toml').absolutePath
def testConfig = project.file('pytest.ini').exists() ? 'pytest.ini' : project.rootProject.file('pyproject.toml').absolutePath
module = "coverage"
command = "run --data-file=${dataFile} --rcfile=${rcFile} -m pytest -s ${testFilesDirectory} -c ${testConfig}"
}
coverageTask.configure {
dependsOn taskDependencies
}
}
}
class AirbytePythonPlugin implements Plugin<Project> {
void apply(Project project) {
def venvDirectoryName = '.venv'
// Add a task that allows cleaning up venvs to every python project
def cleanPythonVenv = project.tasks.register('cleanPythonVenv', Exec) {
commandLine 'rm'
args '-rf', "${project.projectDir.absolutePath}/${venvDirectoryName}"
}
project.tasks.named('clean').configure {
dependsOn cleanPythonVenv
}
project.plugins.apply 'ru.vyarus.use-python'
// Configure gradle python plugin.
project.python {
envPath = venvDirectoryName
minPythonVersion '3.10'
// Amazon Linux support.
// The airbyte-ci tool runs gradle tasks in AL2023-based containers.
// In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly.
// See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details.
try {
if ("python3.11 --version".execute().waitFor() == 0) {
// python3.11 definitely exists at this point, use it instead of 'python3'.
pythonBinary "python3.11"
}
} catch (IOException _) {
// Swallow exception if python3.11 is not installed.
}
// Pyenv support.
try {
def pyenvRoot = "pyenv root".execute()
def pyenvLatest = "pyenv latest ${minPythonVersion}".execute()
// Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases.
pythonBinary "python"
if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) {
pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin"
}
} catch (IOException _) {
// Swallow exception if pyenv is not installed.
}
scope 'VIRTUALENV'
installVirtualenv = true
pip 'pip:23.2.1'
pip 'mccabe:0.6.1'
// https://github.com/csachs/pyproject-flake8/issues/13
pip 'flake8:4.0.1'
// flake8 doesn't support pyproject.toml files
// and thus there is the wrapper "pyproject-flake8" for this
pip 'pyproject-flake8:0.0.1a2'
pip 'pytest:6.2.5'
pip 'coverage[toml]:6.3.1'
}
// Attempt to install anything in requirements.txt.
// By convention this should only be dependencies whose source is located in the project.
if (project.file('requirements.txt').exists()) {
project.tasks.register('installLocalReqs', PythonTask) {
module = "pip"
command = "install -r requirements.txt"
inputs.file('requirements.txt')
outputs.file('build/installedlocalreqs.txt')
}
} else if (project.file('setup.py').exists()) {
// If requirements.txt does not exists, install from setup.py instead, assume a dev or "tests" profile exists.
// In this case, there is no need to depend on the base python modules since everything should be contained in the setup.py.
project.tasks.register('installLocalReqs', PythonTask) {
module = "pip"
command = "install .[dev,tests]"
inputs.file('setup.py')
outputs.file('build/installedlocalreqs.txt')
}
} else {
return
}
def installLocalReqs = project.tasks.named('installLocalReqs')
def flakeCheck = project.tasks.register('flakeCheck', PythonTask) {
module = "pflake8"
command = "--config ${project.rootProject.file('pyproject.toml').absolutePath} ./"
}
def installReqs = project.tasks.register('installReqs', PythonTask) {
module = "pip"
command = "install .[main]"
inputs.file('setup.py')
outputs.file('build/installedreqs.txt')
}
installReqs.configure {
dependsOn installLocalReqs
}
project.tasks.named('check').configure {
dependsOn installReqs
dependsOn flakeCheck
}
def installTestReqs = project.tasks.register('installTestReqs', PythonTask) {
module = "pip"
command = "install .[tests]"
inputs.file('setup.py')
outputs.file('build/installedtestreqs.txt')
}
installTestReqs.configure {
dependsOn installReqs
}
Helpers.addTestTaskIfTestFilesFound(project, 'unit_tests', 'testPython', installTestReqs)
project.tasks.named('check').configure {
dependsOn project.tasks.matching { it.name == 'testPython' }
}
Helpers.addTestTaskIfTestFilesFound(project, 'integration_tests', 'integrationTestPython', installTestReqs)
def integrationTestTasks = project.tasks.matching { it.name == 'integrationTestPython' }
integrationTestTasks.configureEach {
dependsOn project.tasks.named('assemble')
mustRunAfter project.tasks.named('check')
}
project.tasks.named('build').configure {
dependsOn integrationTestTasks
}
}
}

View File

@@ -159,9 +159,6 @@ if (isCiServer || isAirbyteCI) {
rootProject.name = 'airbyte'
include ':tools:code-generator'
include ':airbyte-cdk:python'
include ':airbyte-cdk:java:airbyte-cdk'
include ':airbyte-cdk:java:airbyte-cdk:dependencies'
include ':airbyte-cdk:java:airbyte-cdk:core'

View File

@@ -1,3 +0,0 @@
plugins {
id 'airbyte-docker-legacy'
}