1
0
mirror of synced 2025-12-19 18:14:56 -05:00

gradle: split off python cdk (#35306)

This commit is contained in:
Marius Posta
2024-02-16 04:39:14 -08:00
committed by GitHub
parent b741045f50
commit dc088bc3a9
18 changed files with 551 additions and 554 deletions

View File

@@ -1,23 +0,0 @@
import java.nio.file.Paths
class DockerHelpers {
static String extractLabelValue(String dockerFile, String labelName) {
def file = dockerFile instanceof File ? dockerFile : new File(dockerFile)
return file.readLines()
.grep({ it.startsWith('LABEL') && it.contains(labelName) })
.get(0)
.split('=')[1]
}
static String extractImageName(String dockerFile) {
return extractLabelValue(dockerFile, "io.airbyte.name")
}
static String extractImageVersion(String dockerFile) {
return extractLabelValue(dockerFile, "io.airbyte.version")
}
static String getDevTaggedImage(projectDir, dockerfileName) {
return "${extractImageName(Paths.get(projectDir.absolutePath, dockerfileName).toString())}:dev"
}
}

View File

@@ -1,331 +0,0 @@
import java.nio.file.Paths
import java.security.MessageDigest
import java.util.concurrent.ConcurrentHashMap
import org.apache.commons.text.StringSubstitutor
import org.gradle.api.DefaultTask
import org.gradle.api.GradleException
import org.gradle.api.Plugin
import org.gradle.api.Project
import org.gradle.api.file.ConfigurableFileTree
import org.gradle.api.file.FileCollection
import org.gradle.api.tasks.CacheableTask
import org.gradle.api.tasks.Input
import org.gradle.api.tasks.InputFile
import org.gradle.api.tasks.InputFiles
import org.gradle.api.tasks.OutputFile
import org.gradle.api.tasks.PathSensitive
import org.gradle.api.tasks.PathSensitivity
import org.gradle.api.tasks.TaskAction
/**
* AirbyteDockerLegacyTask is the task which builds a docker image based on a Dockerfile.
*
* It and the other classes in this file have "Legacy" in their name because we want to get rid of this plugin in favor
* of dagger-pipeline-based tooling like `airbyte-ci`. As of the time of this writing this is already the case for
* connectors. There are still a few remaining usages outside of connectors and they are useful to support a smooth
* local java-centric development experience with gradle, especially around integration tests.
*
* Issue https://github.com/airbytehq/airbyte/issues/30708 tracks the complete removal of this plugin.
*/
@CacheableTask
abstract class AirbyteDockerLegacyTask extends DefaultTask {
@InputFiles
@PathSensitive(PathSensitivity.RELATIVE)
FileCollection filesInDockerImage
@Input
Map<String, String> baseImageHashes
@InputFile
@PathSensitive(PathSensitivity.RELATIVE)
File dockerFile
@OutputFile
File idFileOutput
@InputFile
@PathSensitive(PathSensitivity.RELATIVE)
File buildScript = project.rootProject.file('tools/bin/build_image.sh')
@TaskAction
def dockerTask() {
project.exec {
commandLine(
buildScript.absolutePath,
project.rootDir.absolutePath,
project.projectDir.absolutePath,
dockerFile.name,
DockerHelpers.getDevTaggedImage(project.projectDir, dockerFile.name),
idFileOutput.absolutePath,
)
}
}
}
/**
* AirbyteDockerLegacyTaskFactory is a convenience object to avoid passing the current project around.
*/
class AirbyteDockerLegacyTaskFactory {
private AirbyteDockerLegacyTaskFactory() {}
Project project
String dockerFileName
File dockerFile() {
return project.file(dockerFileName)
}
// This hash of the full path to the Dockerfile is the name of the task's output file.
String dockerfilePathHash() {
return MessageDigest.getInstance("MD5")
.digest(dockerFile().absolutePath.getBytes())
.encodeHex()
.toString()
}
// A superset of the files which are COPYed into the image, defined as the project file set
// with the .dockerignore rules applied to it.
// We could be more precise by parsing the Dockerfile but this is good enough in practice.
FileCollection filteredProjectFiles() {
ConfigurableFileTree files = project.fileTree(project.projectDir)
def dockerignore = project.file('.dockerignore')
if (!dockerignore.exists()) {
return files.filter {
file -> !file.toString().contains(".venv")
}
}
for (def rule : dockerignore.readLines()) {
if (rule.startsWith("#")) {
continue
}
rule = rule.trim()
files = (rule.startsWith("!") ? files.include(rule.substring(1)) : files.exclude(rule)) as ConfigurableFileTree
}
return files
}
// Queries docker for all images and their hashes.
static synchronized Map<String,String> collectKnownImageHashes(Project project) {
def stdout = new ByteArrayOutputStream()
project.rootProject.exec {
commandLine "docker", "images", "--no-trunc", "-f", "dangling=false", "--format", "{{.Repository}}:{{.Tag}} {{.ID}}"
standardOutput = stdout
}
Map<String,String> map = [:]
stdout.toString().eachLine {line ->
def splits = line.split()
map.put(splits[0], splits[1].trim())
}
return map
}
// Query all docker images at most once for all tasks, at task creation time.
static def lazyImageHashesAtTaskCreationTime = new LazyImageHashesCache()
static class LazyImageHashesCache {
private Map<String, String> lazyValue
synchronized Map<String, String> get(Project project) {
if (lazyValue == null) {
lazyValue = collectKnownImageHashes(project)
}
return lazyValue
}
}
// Global mapping of tagged image name to gradle project.
// This is populated at configuration time and accessed at task creation time.
// All keys verify isTaggedImageOwnedByThisRepo.
static def taggedImageToProject = new ConcurrentHashMap<String,Project>()
static boolean isTaggedImageOwnedByThisRepo(String taggedImage) {
if (!taggedImage.startsWith("airbyte/")) {
// Airbyte's docker images are all prefixed like this.
// Anything not with this prefix is therefore not owned by this repo.
return false
}
if (taggedImage.startsWith("airbyte/base-airbyte-protocol-python:")) {
// Special case: this image is not built by this repo.
return false
}
if (!taggedImage.endsWith(":dev")) {
// Special case: this image is owned by this repo but built separate. e.g. source-file-secure
return false
}
// Otherwise, assume the image is built by this repo.
return true
}
// Returns a mapping of each base image referenced in the Dockerfile to the corresponding hash
// in the results of collectKnownImageHashes(). If no hash was found, map to "???" instead.
Map<String, String> baseTaggedImagesAndHashes(Map<String, String> allKnownImageHashes) {
def taggedImages = new HashSet<String>()
// Look for "FROM foo AS bar" directives, and add them to the map with .put("bar", "foo")
Map<String, String> imageAliases = [:]
dockerFile().eachLine { line ->
def parts = line.split()
if (parts.length >= 4 && parts[0].equals("FROM") && parts[parts.length - 2].equals("AS")) {
imageAliases.put(parts[parts.length - 1], parts[1])
}
}
dockerFile().eachLine { line ->
if (line.startsWith("FROM ")) {
def image = line.split()[1]
assert !image.isEmpty()
taggedImages.add(image)
} else if (line.startsWith("COPY --from=")) {
def image = line.substring("COPY --from=".length()).split()[0]
assert !image.isEmpty()
if (imageAliases[image] != null) {
taggedImages.add(imageAliases[image])
} else {
taggedImages.add(image)
}
}
}
Map<String, String> result = [:]
for (def taggedImage : taggedImages) {
// Some image tags rely on environment variables (e.g. "FROM amazoncorretto:${JDK_VERSION}").
taggedImage = new StringSubstitutor(System.getenv()).replace(taggedImage).trim()
result.put(taggedImage, allKnownImageHashes.getOrDefault(taggedImage, "???"))
}
return result
}
// Create the task lazily: we shouldn't invoke 'docker' unless the task is created as part of the build.
def createTask(String taskName) {
if (!dockerFile().exists()) {
// This might not actually be necessary. It doesn't seem harmful either.
return project.tasks.register(taskName) {
logger.info "Skipping ${taskName} because ${dockerFile()} does not exist."
}
}
// Tagged name of the image to be built by this task.
def taggedImage = DockerHelpers.getDevTaggedImage(project.projectDir, dockerFileName)
// Map this project to the tagged name of the image built by this task.
taggedImageToProject.put(taggedImage, project)
// Path to the ID file to be generated by this task.
// The ID file contains the hash of the image.
def idFilePath = Paths.get(project.rootProject.rootDir.absolutePath, '.dockerversions', dockerfilePathHash())
// Register the task (lazy creation).
def airbyteDockerTask = project.tasks.register(taskName, AirbyteDockerLegacyTask) { task ->
// Set inputs.
task.filesInDockerImage = filteredProjectFiles()
task.dockerFile = this.dockerFile()
task.baseImageHashes = baseTaggedImagesAndHashes(lazyImageHashesAtTaskCreationTime.get(project))
// Set dependencies on base images built by this repo.
for (String taggedImageDependency : task.baseImageHashes.keySet()) {
if (isTaggedImageOwnedByThisRepo(taggedImageDependency)) {
task.logger.info("adding airbyteDocker task dependency: image ${taggedImage} is based on ${taggedImageDependency}")
def dependentProject = taggedImageToProject.get(taggedImageDependency)
if (dependentProject == null) {
throw new GradleException("no known project for image ${taggedImageDependency}")
}
// Depend on 'assemble' instead of 'airbyteDocker' or 'airbyteDockerTest', it's simpler that way.
task.dependsOn(dependentProject.tasks.named('assemble'))
}
}
// Set outputs.
task.idFileOutput = idFilePath.toFile()
task.outputs.upToDateWhen {
// Because the baseImageHashes is computed at task creation time, it may be stale
// at task execution time. Let's double-check.
// Missing dependency declarations in the gradle build may result in the airbyteDocker tasks
// to be created in the wrong order. Not worth breaking the build over.
for (Map.Entry<String, String> e : task.baseImageHashes) {
if (isTaggedImageOwnedByThisRepo(e.key) && e.value == "???") {
task.logger.info "Not up to date: missing at least one airbyte base image in docker"
return false
}
}
// Fetch the hashes of the required based images anew.
def allImageHashes = collectKnownImageHashes(task.project)
// If the image to be built by this task doesn't exist in docker, then it definitely should
// be built regardless of the status of the ID file.
// For instance, it's possible that a `docker image rm` occurred between consecutive
// identical gradle builds: the ID file remains untouched but the image still needs to be rebuilt.
if (!allImageHashes.containsKey(taggedImage)) {
task.logger.info "Not up to date: ID file exists but target image not found in docker"
return false
}
// If the depended-upon base images have changed in the meantime, then it follows that the target
// image needs to be rebuilt regardless of the status of the ID file.
def currentBaseImageHashes = baseTaggedImagesAndHashes(allImageHashes)
if (!task.baseImageHashes.equals(currentBaseImageHashes)) {
task.logger.info "Not up to date: at last one base image has changed in docker since task creation"
return false
}
// In all other cases, if the ID file hasn't been touched, then the task can be skipped.
return true
}
}
airbyteDockerTask.configure {
// Images for java projects always rely on the distribution tarball.
dependsOn project.tasks.matching { it.name == 'distTar' }
// Ensure that all files exist beforehand.
dependsOn project.tasks.matching { it.name == 'generate' }
}
project.tasks.named('assemble').configure {
// We may revisit the dependency on assemble but the dependency should always be on a base task.
dependsOn airbyteDockerTask
}
// Add a task to clean up when doing a gradle clean.
// Don't actually mess with docker, just delete the output file.
def airbyteDockerCleanTask = project.tasks.register(taskName + "Clean", Delete) {
delete idFilePath
}
project.tasks.named('clean').configure {
dependsOn airbyteDockerCleanTask
}
return airbyteDockerTask
}
static def build(Project project, String taskName, String dockerFileName) {
def f = new AirbyteDockerLegacyTaskFactory()
f.project = project
f.dockerFileName = dockerFileName
f.createTask(taskName)
}
}
/**
* AirbyteDockerLegacyPlugin creates an airbyteDocker task for the project when a Dockerfile is present.
*
* Following the same logic, it creates airbyteDockerTest when Dockerfile.test is present, though
* that behavior is not used anywhere except in the source-mongo connector and is therefore deprecated
* through the use of airbyte-ci.
*/
class AirbyteDockerLegacyPlugin implements Plugin<Project> {
void apply(Project project) {
AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDocker', 'Dockerfile')
// Used only for source-mongodb. Consider removing entirely.
if (project.name.endsWith('source-mongodb')) {
AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDockerTest', 'Dockerfile.test')
}
// Used for base-normalization.
if (project.name.endsWith('base-normalization')) {
['airbyteDockerMSSql' : 'mssql',
'airbyteDockerMySql' : 'mysql',
'airbyteDockerOracle' : 'oracle',
'airbyteDockerClickhouse': 'clickhouse',
'airbyteDockerSnowflake' : 'snowflake',
'airbyteDockerRedshift' : 'redshift',
'airbyteDockerTiDB' : 'tidb',
'airbyteDockerDuckDB' : 'duckdb'
].forEach {taskName, customConnector ->
AirbyteDockerLegacyTaskFactory.build(project, taskName, "${customConnector}.Dockerfile")
}
}
}
}

View File

@@ -1,185 +0,0 @@
import groovy.io.FileType
import groovy.io.FileVisitResult
import org.gradle.api.GradleException
import org.gradle.api.Plugin
import org.gradle.api.Project
import org.gradle.api.tasks.Exec
import ru.vyarus.gradle.plugin.python.task.PythonTask
class Helpers {
static addTestTaskIfTestFilesFound(Project project, String testFilesDirectory, String taskName, taskDependencies) {
"""
This method verifies if there are test files in a directory before adding the pytest task to run tests on that directory. This is needed
because if there are no tests in that dir and we run pytest on it, it exits with exit code 5 which gradle takes to mean that the process
failed, since it's non-zero. This means that if a module doesn't need a unit or integration test, it still needs to add a dummy test file
like:
```
def make_ci_pass_test():
assert True
```
So we use this method to leverage pytest's test discovery rules (https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery)
to selectively run pytest based on whether there seem to be test files in that directory.
Namely, if the directory contains a file whose name is test_*.py or *_test.py then it's a test.
See https://github.com/airbytehq/airbyte/issues/4979 for original context
"""
boolean requiresTasks = false
if (project.file(testFilesDirectory).exists()) {
def testDir = project.projectDir.toPath().resolve(testFilesDirectory)
testDir.traverse(type: FileType.FILES, nameFilter: ~/(^test_.*|.*_test)\.py$/) {file ->
requiresTasks = true
// If a file is found, terminate the traversal, thus causing this task to be declared at most once
return FileVisitResult.TERMINATE
}
}
if (!requiresTasks) {
return
}
def coverageTask = project.tasks.register(taskName, PythonTask) {
def dataFile = "${testFilesDirectory}/.coverage.${taskName}"
def rcFile = project.rootProject.file('pyproject.toml').absolutePath
def testConfig = project.file('pytest.ini').exists() ? 'pytest.ini' : project.rootProject.file('pyproject.toml').absolutePath
module = "coverage"
command = "run --data-file=${dataFile} --rcfile=${rcFile} -m pytest -s ${testFilesDirectory} -c ${testConfig}"
}
coverageTask.configure {
dependsOn taskDependencies
}
}
}
class AirbytePythonPlugin implements Plugin<Project> {
void apply(Project project) {
def venvDirectoryName = '.venv'
// Add a task that allows cleaning up venvs to every python project
def cleanPythonVenv = project.tasks.register('cleanPythonVenv', Exec) {
commandLine 'rm'
args '-rf', "${project.projectDir.absolutePath}/${venvDirectoryName}"
}
project.tasks.named('clean').configure {
dependsOn cleanPythonVenv
}
project.plugins.apply 'ru.vyarus.use-python'
// Configure gradle python plugin.
project.python {
envPath = venvDirectoryName
minPythonVersion '3.10'
// Amazon Linux support.
// The airbyte-ci tool runs gradle tasks in AL2023-based containers.
// In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly.
// See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details.
try {
if ("python3.11 --version".execute().waitFor() == 0) {
// python3.11 definitely exists at this point, use it instead of 'python3'.
pythonBinary "python3.11"
}
} catch (IOException _) {
// Swallow exception if python3.11 is not installed.
}
// Pyenv support.
try {
def pyenvRoot = "pyenv root".execute()
def pyenvLatest = "pyenv latest ${minPythonVersion}".execute()
// Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases.
pythonBinary "python"
if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) {
pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin"
}
} catch (IOException _) {
// Swallow exception if pyenv is not installed.
}
scope 'VIRTUALENV'
installVirtualenv = true
pip 'pip:23.2.1'
pip 'mccabe:0.6.1'
// https://github.com/csachs/pyproject-flake8/issues/13
pip 'flake8:4.0.1'
// flake8 doesn't support pyproject.toml files
// and thus there is the wrapper "pyproject-flake8" for this
pip 'pyproject-flake8:0.0.1a2'
pip 'pytest:6.2.5'
pip 'coverage[toml]:6.3.1'
}
// Attempt to install anything in requirements.txt.
// By convention this should only be dependencies whose source is located in the project.
if (project.file('requirements.txt').exists()) {
project.tasks.register('installLocalReqs', PythonTask) {
module = "pip"
command = "install -r requirements.txt"
inputs.file('requirements.txt')
outputs.file('build/installedlocalreqs.txt')
}
} else if (project.file('setup.py').exists()) {
// If requirements.txt does not exists, install from setup.py instead, assume a dev or "tests" profile exists.
// In this case, there is no need to depend on the base python modules since everything should be contained in the setup.py.
project.tasks.register('installLocalReqs', PythonTask) {
module = "pip"
command = "install .[dev,tests]"
inputs.file('setup.py')
outputs.file('build/installedlocalreqs.txt')
}
} else {
return
}
def installLocalReqs = project.tasks.named('installLocalReqs')
def flakeCheck = project.tasks.register('flakeCheck', PythonTask) {
module = "pflake8"
command = "--config ${project.rootProject.file('pyproject.toml').absolutePath} ./"
}
def installReqs = project.tasks.register('installReqs', PythonTask) {
module = "pip"
command = "install .[main]"
inputs.file('setup.py')
outputs.file('build/installedreqs.txt')
}
installReqs.configure {
dependsOn installLocalReqs
}
project.tasks.named('check').configure {
dependsOn installReqs
dependsOn flakeCheck
}
def installTestReqs = project.tasks.register('installTestReqs', PythonTask) {
module = "pip"
command = "install .[tests]"
inputs.file('setup.py')
outputs.file('build/installedtestreqs.txt')
}
installTestReqs.configure {
dependsOn installReqs
}
Helpers.addTestTaskIfTestFilesFound(project, 'unit_tests', 'testPython', installTestReqs)
project.tasks.named('check').configure {
dependsOn project.tasks.matching { it.name == 'testPython' }
}
Helpers.addTestTaskIfTestFilesFound(project, 'integration_tests', 'integrationTestPython', installTestReqs)
def integrationTestTasks = project.tasks.matching { it.name == 'integrationTestPython' }
integrationTestTasks.configureEach {
dependsOn project.tasks.named('assemble')
mustRunAfter project.tasks.named('check')
}
project.tasks.named('build').configure {
dependsOn integrationTestTasks
}
}
}