1
0
mirror of synced 2025-12-19 18:14:56 -05:00
Files
airbyte/build.gradle
2023-10-23 08:26:23 -07:00

710 lines
27 KiB
Groovy

import com.github.spotbugs.snom.SpotBugsTask
import ru.vyarus.gradle.plugin.python.task.PythonTask
import com.hierynomus.gradle.license.tasks.LicenseCheck
import com.hierynomus.gradle.license.tasks.LicenseFormat
// The buildscript block defines dependencies in order for .gradle file evaluation.
// This is separate from application dependencies.
// See https://stackoverflow.com/questions/17773817/purpose-of-buildscript-block-in-gradle.
buildscript {
dependencies {
classpath 'com.diffplug.spotless:spotless-plugin-gradle:6.20.0'
classpath 'gradle.plugin.com.hierynomus.gradle.plugins:license-gradle-plugin:0.16.1'
}
}
plugins {
id 'base'
id 'com.github.node-gradle.node' version '3.5.1'
id 'com.github.spotbugs' version '5.0.13'
id 'version-catalog'
id 'ru.vyarus.use-python'
}
Properties env = new Properties()
rootProject.file('gradle.properties').withInputStream { env.load(it) }
if (!env.containsKey('VERSION')) {
throw new Exception('Version not specified in .env file...')
}
// `version` is used as the application build version for artifacts like jars
// `image_tag` is used as the docker tag applied to built images.
// These values are the same for building an specific Airbyte release or branch via the 'VERSION' environment variable.
// For local development builds, the 'VERSION' environment variable is unset, and built images are tagged with 'dev'.
ext {
version = System.getenv("VERSION") ?: env.VERSION
image_tag = System.getenv("VERSION") ?: 'dev'
skipSlowTests = (System.getProperty('skipSlowTests', 'false') != 'false')
}
// Pyenv support.
try {
def pyenvRoot = "pyenv root".execute()
if (pyenvRoot.waitFor() == 0) {
ext.pyenvRoot = pyenvRoot.text.trim()
}
} catch (IOException _) {
// Swallow exception if pyenv is not installed.
}
def isConnectorProject = { Project project ->
if (project.parent == null || project.parent.name != 'connectors') {
return false
}
return project.name.startsWith("source-") || project.name.startsWith("destination-")
}
allprojects {
apply plugin: 'base'
// by default gradle uses directory as the project name. That works very well in a single project environment but
// projects clobber each other in an environments with subprojects when projects are in directories named identically.
def sub = rootDir.relativePath(projectDir.parentFile).replace('/', '.')
group = "io.${rootProject.name}${sub.isEmpty() ? '' : ".$sub"}"
project.base.archivesName = "${project.group}-${project.name}"
version = rootProject.ext.version
}
// License generation logic.
def createLicenseWith = { File license, String startComment, String endComment, String lineComment, boolean isPython ->
/*
In java, we don't have a second linter/styling tool other than spotless so it doesn't really
matter if we write a newline or not for startComment/endComment.
However, in python, we are using black that double-checks and reformats the code.
Thus, writing an extra empty newline (not removed by trimTrailingWhitespace() is actually a
big deal and would be reformatted (removed) because of black's specs.
*/
def tmp = File.createTempFile('tmp', '.tmp')
tmp.withWriter {
def w = it
if (startComment.length() > 0 || !isPython) {
w.writeLine(startComment)
}
license.eachLine {
w << lineComment
w.writeLine(it)
}
if (endComment.length() > 0 || !isPython) {
w.writeLine(endComment)
}
w.writeLine("")
if (isPython) {
w.writeLine("")
}
}
return tmp
}
def createPythonLicenseWith = { license ->
return createLicenseWith(license, '', '', '', true)
}
def createJavaLicenseWith = { license ->
return createLicenseWith(license, '/*', ' */', ' * ', false)
}
// node is required by the root project to apply the prettier formatter repo-wide.
node {
download = true
version = '18.18.0'
npmVersion = '10.1.0'
// When setting both these directories, npm and node will be in separate directories.
workDir = file("${buildDir}/nodejs")
npmWorkDir = file("${buildDir}/npm")
// Do not declare the repository.
distBaseUrl = null
}
// python is required by the root project to apply python formatters like isort or black.
python {
envPath = '.venv'
minPythonVersion = '3.10' // should be 3.10 for local development
// Amazon Linux support.
// The airbyte-ci tool runs gradle tasks in AL2023-based containers.
// In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly.
// See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details.
try {
if ("python3.11 --version".execute().waitFor() == 0) {
// python3.11 definitely exists at this point, use it instead of 'python3'.
pythonBinary "python3.11"
}
} catch (IOException _) {
// Swallow exception if python3.11 is not installed.
}
// Pyenv support.
try {
def pyenvRoot = "pyenv root".execute()
def pyenvLatest = "pyenv latest ${minPythonVersion}".execute()
// Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases.
pythonBinary "python"
if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) {
pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin"
}
} catch (IOException _) {
// Swallow exception if pyenv is not installed.
}
scope = 'VIRTUALENV'
installVirtualenv = true
// black and isort are required for formatting
pip 'black:22.3.0'
pip 'isort:5.6.4'
// poetry is required for installing and running airbyte-ci
pip 'poetry:1.5.1'
}
def cleanPythonVenv = rootProject.tasks.register('cleanPythonVenv', Exec) {
commandLine 'rm'
args '-rf', "${rootProject.projectDir.absolutePath}/.venv"
}
rootProject.tasks.named('clean').configure {
dependsOn cleanPythonVenv
}
// format tasks per project
allprojects {
def createFormatTarget = { pattern ->
// We are the spotless exclusions rules using file tree. It seems the excludeTarget option is super finicky in a
// monorepo setup and it doesn't actually exclude directories reliably.
// This code makes the behavior predictable.
ArrayList<String> excludes = [
'.gradle',
'node_modules',
'.eggs',
'.mypy_cache',
'.venv',
'*.egg-info',
'build',
'dbt-project-template',
'dbt-project-template-mssql',
'dbt-project-template-mysql',
'dbt-project-template-oracle',
'dbt-project-template-clickhouse',
'dbt-project-template-snowflake',
'dbt-project-template-tidb',
'dbt-project-template-duckdb',
'dbt_test_config',
'normalization_test_output',
'tools',
'secrets',
'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately.
'resources/seed/*_catalog.json', // Do not remove - this is also necessary to prevent diffs in our github workflows
'resources/seed/*_registry.json', // Do not remove - this is also necessary to prevent diffs in our github workflows
'resources/seed/specs_secrets_mask.yaml', // Downloaded externally.
'resources/examples/airflow/superset/docker/pythonpath_dev/superset_config.py',
'source-amplitude/unit_tests/api_data/zipped.json', // Zipped file presents as non-UTF-8 making spotless sad
'airbyte-connector-builder-server/connector_builder/generated', // autogenerated code doesn't need to be formatted
'airbyte-ci/connectors/metadata_service/lib/tests/fixtures/**/invalid', // These are deliberately invalid and unformattable.
'__init__.py',
'declarative_component_schema.py',
'source-stock-ticker-api-tutorial/source.py',
'tools/git_hooks/tests/test_spec_linter.py',
'tools/schema_generator/schema_generator/infer_schemas.py',
]
// Ensure that the excludes work when we're already partly in their path.
excludes.addAll excludes.collectMany {
it.startsWith("${project.name}/") ? [it, it.substring("${project.name}/".length())] : [it]
}
// Prefix everything with double-globs because we only care about path suffixes.
excludes = excludes.collect { "**/${it}" }
// Remove whatever's covered by the project's subprojects, so that each file is targeted by at most one task.
def currentProject = project
project.subprojects {
if (it.parent.projectDir == currentProject.projectDir) {
excludes.add "${it.projectDir}/**"
}
}
// Build the FileTree.
return fileTree(dir: projectDir, include: pattern, exclude: excludes)
}
// Apply spotless formatting.
apply plugin: 'com.diffplug.spotless'
spotless {
def javaTarget = createFormatTarget('**/*.java')
if (!javaTarget.isEmpty()) {
java {
target javaTarget
importOrder()
eclipse('4.21').configFile(rootProject.file('tools/gradle/codestyle/java-google-style.xml'))
licenseHeaderFile createJavaLicenseWith(rootProject.file('LICENSE_SHORT'))
removeUnusedImports()
trimTrailingWhitespace()
}
}
def groovyGradleTarget = createFormatTarget('**/*.gradle')
if (!groovyGradleTarget.isEmpty()) {
groovyGradle {
target groovyGradleTarget
}
}
def sqlTarget = createFormatTarget('**/*.sql')
if (!sqlTarget.isEmpty()) {
sql {
target sqlTarget
dbeaver().configFile(rootProject.file('tools/gradle/codestyle/sql-dbeaver.properties'))
}
}
def stylingTarget = createFormatTarget(['**/*.yaml', '**/*.yml', '**/*.json'])
if (!stylingTarget.isEmpty()) {
format 'styling', {
target stylingTarget
def npmPath = "${rootProject.tasks.named('npmSetup').get().npmDir.get()}/bin/npm"
def nodePath = "${rootProject.tasks.named('nodeSetup').get().nodeDir.get()}/bin/node"
prettier().npmExecutable(npmPath).nodeExecutable(nodePath)
}
}
}
tasks.matching { it.name =~ /spotless.*/ }.configureEach {
dependsOn tasks.matching { it.name == 'generate' }
}
tasks.matching { it.name =~ /spotlessStyling.*/ }.configureEach {
dependsOn rootProject.tasks.named('nodeSetup')
dependsOn rootProject.tasks.named('npmSetup')
}
// TODO: remove this once the CI routinely enforces formatting checks.
tasks.matching { it.name =~ /spotless.*Check/ }.configureEach {
enabled = false
}
// python license header generation is part of 'format'
def pythonTarget = createFormatTarget('**/*.py')
if (!project.name.endsWith('source-stock-ticker-api-tutorial') && !pythonTarget.isEmpty()) {
apply plugin: 'com.github.hierynomus.license'
license {
header rootProject.file("LICENSE_SHORT")
}
// Disable auto-generated java tasks, we rely on spotless there.
tasks.matching { it.name =~ /license.*/ }.configureEach {
enabled = false
}
def licensePythonCheck = tasks.register('licensePythonCheck', LicenseCheck) {
header = createPythonLicenseWith(rootProject.file('LICENSE_SHORT'))
source = pythonTarget
strictCheck = true
}
licensePythonCheck.configure {
dependsOn tasks.matching { it.name == 'generate' }
}
def licensePythonFormat = tasks.register('licensePythonFormat', LicenseFormat) {
header = createPythonLicenseWith(rootProject.file('LICENSE_SHORT'))
source = pythonTarget
strictCheck = true
}
licensePythonFormat.configure {
dependsOn tasks.matching { it.name == 'generate' }
}
def registerPythonFormatTask = {
String taskName, String moduleName, String settingsFileFlagName, String... extraArgs ->
def task = tasks.register(taskName, Exec) {
workingDir projectDir
commandLine rootProject.file('.venv/bin/python')
args "-m", moduleName, settingsFileFlagName, rootProject.file('pyproject.toml').absolutePath
args extraArgs
pythonTarget.getFiles().forEach {
args projectDir.relativePath(it)
}
}
task.configure {
dependsOn rootProject.tasks.named('pipInstall')
dependsOn tasks.matching { it.name == 'generate' }
}
return task
}
def isortPythonCheck = registerPythonFormatTask('isortPythonCheck', 'isort', '--settings-file', '--check-only')
isortPythonCheck.configure {
dependsOn licensePythonCheck
}
def isortPythonFormat = registerPythonFormatTask('isortPythonFormat', 'isort', '--settings-file')
isortPythonFormat.configure {
dependsOn licensePythonFormat
}
def blackPythonCheck = registerPythonFormatTask('blackPythonCheck', 'black', '--config', '--check')
blackPythonCheck.configure {
dependsOn licensePythonCheck
dependsOn isortPythonCheck
}
def blackPythonFormat = registerPythonFormatTask('blackPythonFormat', 'black', '--config')
blackPythonFormat.configure {
dependsOn licensePythonFormat
dependsOn isortPythonFormat
}
}
tasks.register('format') {
dependsOn tasks.matching { it.name == 'generate' }
dependsOn tasks.matching { it.name =~ /spotless.*Apply/ }
dependsOn tasks.matching { it.name =~ /.*PythonFormat/ }
}
tasks.named('check').configure {
dependsOn tasks.matching { it.name == 'licensePythonCheck' }
dependsOn tasks.matching { it.name == 'isortPythonCheck' }
dependsOn tasks.matching { it.name == 'blackPythonCheck' }
}
}
def getCDKTargetVersion() {
def props = new Properties()
file("airbyte-cdk/java/airbyte-cdk/src/main/resources/version.properties").withInputStream { props.load(it) }
return props.getProperty('version')
}
static def getLatestFileModifiedTimeFromFiles(files) {
if (files.isEmpty()) {
return null
}
return files.findAll { it.isFile() }
.collect { it.lastModified() }
.max()
}
def checkCDKJarExists(requiredSnapshotVersion) {
if (requiredSnapshotVersion == null) {
// Connector does not require CDK snapshot.
return
}
final boolean checkFileChanges = true
final cdkTargetVersion = getCDKTargetVersion()
if (requiredSnapshotVersion != cdkTargetVersion) {
if (!cdkTargetVersion.contains("-SNAPSHOT")) {
throw new GradleException(
"CDK JAR version is not publishing snapshot but connector requires version ${requiredSnapshotVersion}.\n" +
"Please check that the version in the CDK properties file matches the connector build.gradle."
)
}
throw new GradleException(
"CDK JAR version ${cdkTargetVersion} does not match connector's required version ${requiredSnapshotVersion}.\n" +
"Please check that the version in the CDK properties file matches the connector build.gradle."
)
}
def cdkJar = file("${System.properties['user.home']}/.m2/repository/io/airbyte/airbyte-cdk/${cdkTargetVersion}/airbyte-cdk-${cdkTargetVersion}.jar")
if (!cdkJar.exists()) {
println("WARNING: CDK JAR does not exist at ${cdkJar.path}.\nPlease run './gradlew :airbyte-cdk:java:airbyte-cdk:build'.")
}
if (checkFileChanges) {
def latestJavaFileTimestamp = getLatestFileModifiedTimeFromFiles(file("${rootDir}/airbyte-cdk/java/airbyte-cdk/src").listFiles().findAll { it.isFile() })
if (cdkJar.lastModified() < latestJavaFileTimestamp) {
throw new GradleException("CDK JAR is out of date. Please run './gradlew :airbyte-cdk:java:airbyte-cdk:build'.")
}
}
}
static def getCDKSnapshotRequirement(dependenciesList) {
def cdkSnapshotRequirement = dependenciesList.find {
it.requested instanceof ModuleComponentSelector &&
it.requested.group == 'io.airbyte' &&
it.requested.module == 'airbyte-cdk' &&
it.requested.version.endsWith('-SNAPSHOT')
}
if (cdkSnapshotRequirement == null) {
return null
} else {
return cdkSnapshotRequirement.requested.version
}
}
// Common configurations for 'assemble'.
allprojects {
tasks.withType(Tar).configureEach {
duplicatesStrategy DuplicatesStrategy.INCLUDE
}
tasks.withType(Zip).configureEach {
duplicatesStrategy DuplicatesStrategy.INCLUDE
// Disabling distZip causes the build to break for some reason, so: instead of disabling it, make it fast.
entryCompression ZipEntryCompression.STORED
}
}
// Java projects common configurations.
subprojects { subproj ->
if (!subproj.file('src/main/java').directory) {
return
}
apply plugin: 'java'
apply plugin: 'jacoco'
apply plugin: 'com.github.spotbugs'
java {
sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
}
if (isConnectorProject(subproj)) {
// This is a Java connector project.
// Evaluate CDK project before evaluating the connector.
evaluationDependsOn(':airbyte-cdk:java:airbyte-cdk')
if (!gradle.startParameter.taskNames.any { it.contains(':airbyte-cdk:') } &&
gradle.startParameter.taskNames.any { it.contains(':source-') || it.contains(':destination-') }) {
// We are building a connector. Warn if the CDK JAR is missing or out of date.
final String cdkRelativePath = 'airbyte-cdk/java/airbyte-cdk'
afterEvaluate {
def cdkVersionNeeded = getCDKSnapshotRequirement(configurations.compileClasspath.incoming.resolutionResult.allDependencies)
checkCDKJarExists(cdkVersionNeeded)
}
}
}
jacoco {
toolVersion = "0.8.8"
}
jacocoTestReport {
reports {
html.required = true
xml.required = true
csv.required = false
}
}
def jacocoTestReportTask = tasks.named('jacocoTestReport')
jacocoTestReportTask.configure {
dependsOn tasks.named('test')
}
jacocoTestCoverageVerification {
violationRules {
failOnViolation = false
rule {
element = 'CLASS'
excludes = ['**/*Test*', '**/generated*']
limit {
counter = 'BRANCH'
minimum = 0.8
}
limit {
counter = 'INSTRUCTION'
minimum = 0.8
}
}
}
}
spotbugs {
ignoreFailures = false
effort = 'max'
excludeFilter.set rootProject.file('spotbugs-exclude-filter-file.xml')
reportLevel = 'high'
showProgress = false
toolVersion = '4.7.3'
if (rootProject.ext.skipSlowTests && isConnectorProject(subproj)) {
effort = 'min'
}
}
test {
maxHeapSize = '3g'
maxParallelForks = Runtime.runtime.availableProcessors()
// This allows to set up a `gradle.properties` file inside the connector folder to reduce parallelization.
// This is especially useful for connectors that share resources, like Redshift or Snowflake.
// This limits the number of test classes that execute in parallel.
// See also usages of parallelExecutionsPerThread.
if (subproj.hasProperty('numberThreads')) {
int numberThreads = 0
String numberThreadsString = subproj.property('numberThreads').toString()
if (numberThreadsString.isInteger()) {
numberThreads = numberThreadsString as int
}
if (numberThreads > 0 && numberThreads < maxParallelForks) {
maxParallelForks = numberThreads
}
}
systemProperty 'user.timezone', 'UTC'
if (rootProject.ext.skipSlowTests) {
// Exclude all connector unit tests
exclude '**/io/airbyte/integrations/source/**'
exclude '**/io/airbyte/integrations/destination/**'
}
jacoco {
enabled = !rootProject.ext.skipSlowTests
excludes = ['**/*Test*', '**/generated*']
}
useJUnitPlatform {
excludeTags('cloud-storage')
}
testLogging() {
events "passed", "skipped", "failed"
exceptionFormat 'full'
// uncomment to get the full log output
// showStandardStreams = true
}
finalizedBy jacocoTestReportTask
}
// TODO: These should be added to the CDK or to the individual projects that need them:
dependencies {
implementation(platform("com.fasterxml.jackson:jackson-bom:2.13.0"))
implementation(platform("org.glassfish.jersey:jersey-bom:2.31"))
// version is handled by "com.fasterxml.jackson:jackson-bom:2.10.4", so we do not explicitly set it here.
implementation libs.bundles.jackson
implementation libs.guava
implementation libs.commons.io
implementation libs.bundles.apache
implementation libs.slf4j.api
// SLF4J as a facade over Log4j2 required dependencies
implementation libs.bundles.log4j
// Bridges from other logging implementations to SLF4J
implementation libs.bundles.slf4j
// Dependencies for logging to cloud storage, as well as the various clients used to do so.
implementation libs.appender.log4j2
implementation libs.aws.java.sdk.s3
implementation libs.google.cloud.storage
implementation libs.s3
// Lombok dependencies
compileOnly libs.lombok
annotationProcessor libs.lombok
testCompileOnly libs.lombok
testAnnotationProcessor libs.lombok
testRuntimeOnly libs.junit.jupiter.engine
testImplementation libs.bundles.junit
testImplementation libs.assertj.core
testImplementation libs.junit.pioneer
// adds owasp plugin
spotbugsPlugins libs.findsecbugs.plugin
implementation libs.spotbugs.annotations
}
tasks.withType(SpotBugsTask).configureEach {
// Reports can be found under each subproject in build/spotbugs/
reports {
xml.required = false
html.required = true
}
}
javadoc.options.addStringOption('Xdoclint:none', '-quiet')
tasks.named('check').configure {
dependsOn tasks.named('jacocoTestCoverageVerification')
}
}
// integration and performance test tasks per project
allprojects {
tasks.register('integrationTest') {
dependsOn tasks.matching {
[
'integrationTestJava',
'integrationTestPython',
'standardSourceTestFile',
].contains(it.name)
}
}
tasks.register('performanceTest') {
dependsOn tasks.matching {
[
'performanceTestJava',
].contains(it.name)
}
}
}
// convenience task to list all dependencies per project
subprojects {
tasks.register('listAllDependencies', DependencyReportTask) {}
}
// airbyte-ci tasks for local development
def poetryInstallAirbyteCI = tasks.register('poetryInstallAirbyteCI', Exec) {
workingDir rootProject.file('airbyte-ci/connectors/pipelines')
commandLine rootProject.file('.venv/bin/python').absolutePath
args "-m", "poetry", "install", "--no-cache"
}
poetryInstallAirbyteCI.configure {
dependsOn tasks.named('pipInstall')
}
def poetryCleanVirtualenv = tasks.register('cleanVirtualenv', Exec) {
workingDir rootProject.file('airbyte-ci/connectors/pipelines')
commandLine rootProject.file('.venv/bin/python').absolutePath
args "-m", "poetry", "env", "remove", "--all"
onlyIf {
rootProject.file('.venv/bin/python').exists()
}
}
cleanPythonVenv.configure {
dependsOn poetryCleanVirtualenv
}
subprojects {
if (!isConnectorProject(project)) {
return
}
def airbyteCIConnectorsTask = { String taskName, String... connectorsArgs ->
def task = tasks.register(taskName, Exec) {
workingDir rootDir
environment "CI", "1" // set to use more suitable logging format
commandLine rootProject.file('.venv/bin/python').absolutePath
args "-m", "poetry"
args "--directory", "${rootProject.file('airbyte-ci/connectors/pipelines').absolutePath}"
args "run"
args "airbyte-ci", "connectors", "--name=${project.name}"
args connectorsArgs
// Forbid these kinds of tasks from running concurrently.
// We can induce serial execution by giving them all a common output directory.
outputs.dir rootProject.file("${rootProject.buildDir}/airbyte-ci-lock")
outputs.upToDateWhen { false }
}
task.configure { dependsOn poetryInstallAirbyteCI }
return task
}
// Build connector image as part of 'assemble' task.
// This is required for local 'integrationTest' execution.
def buildConnectorImage = airbyteCIConnectorsTask(
'buildConnectorImage', '--disable-report-auto-open', 'build', '--use-host-gradle-dist-tar')
buildConnectorImage.configure {
// Images for java projects always rely on the distribution tarball.
dependsOn tasks.matching { it.name == 'distTar' }
// Ensure that all files exist beforehand.
dependsOn tasks.matching { it.name == 'generate' }
}
tasks.named('assemble').configure {
// We may revisit the dependency on assemble but the dependency should always be on a base task.
dependsOn buildConnectorImage
}
// Convenience tasks for local airbyte-ci execution.
airbyteCIConnectorsTask('airbyteCIConnectorBuild', 'build')
airbyteCIConnectorsTask('airbyteCIConnectorTest', 'test')
}
// produce reproducible archives
// (see https://docs.gradle.org/current/userguide/working_with_files.html#sec:reproducible_archives)
tasks.withType(AbstractArchiveTask).configureEach {
preserveFileTimestamps = false
reproducibleFileOrder = true
}
// pin dependency versions according to ./deps.toml
catalog {
versionCatalog {
from(files("deps.toml"))
}
}