feat(ns-files): add a metadata layer on top for better performance & versioned ns files

part of https://github.com/kestra-io/kestra/issues/5617
This commit is contained in:
brian.mulier
2025-10-31 16:59:31 +01:00
committed by brian-mulier-p
parent d20f7039c7
commit 682d258e7b
57 changed files with 2369 additions and 534 deletions

View File

@@ -10,7 +10,8 @@ import picocli.CommandLine;
description = "populate metadata for entities",
subcommands = {
KvMetadataMigrationCommand.class,
SecretsMetadataMigrationCommand.class
SecretsMetadataMigrationCommand.class,
NsFilesMetadataMigrationCommand.class
}
)
@Slf4j

View File

@@ -1,8 +1,10 @@
package io.kestra.cli.commands.migrations.metadata;
import io.kestra.core.models.kv.PersistedKvMetadata;
import io.kestra.core.models.namespaces.files.NamespaceFileMetadata;
import io.kestra.core.repositories.FlowRepositoryInterface;
import io.kestra.core.repositories.KvMetadataRepositoryInterface;
import io.kestra.core.repositories.NamespaceFileMetadataRepositoryInterface;
import io.kestra.core.storages.FileAttributes;
import io.kestra.core.storages.StorageContext;
import io.kestra.core.storages.StorageInterface;
@@ -15,11 +17,13 @@ import jakarta.inject.Singleton;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Path;
import java.time.Instant;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import static io.kestra.core.utils.Rethrow.throwConsumer;
@@ -36,6 +40,9 @@ public class MetadataMigrationService {
@Inject
private KvMetadataRepositoryInterface kvMetadataRepository;
@Inject
private NamespaceFileMetadataRepositoryInterface namespaceFileMetadataRepository;
@Inject
private StorageInterface storageInterface;
@@ -49,7 +56,9 @@ public class MetadataMigrationService {
.flatMap(namespacesForTenant -> namespacesForTenant.getValue().stream().map(namespace -> Map.entry(namespacesForTenant.getKey(), namespace)))
.flatMap(throwFunction(namespaceForTenant -> {
InternalKVStore kvStore = new InternalKVStore(namespaceForTenant.getKey(), namespaceForTenant.getValue(), storageInterface, kvMetadataRepository);
List<FileAttributes> list = listAllFromStorage(storageInterface, namespaceForTenant.getKey(), namespaceForTenant.getValue());
List<FileAttributes> list = listAllFromStorage(storageInterface, StorageContext::kvPrefix, namespaceForTenant.getKey(), namespaceForTenant.getValue()).stream()
.map(PathAndAttributes::attributes)
.toList();
Map<Boolean, List<KVEntry>> entriesByIsExpired = list.stream()
.map(throwFunction(fileAttributes -> KVEntry.from(namespaceForTenant.getValue(), fileAttributes)))
.collect(Collectors.partitioningBy(kvEntry -> Optional.ofNullable(kvEntry.expirationDate()).map(expirationDate -> Instant.now().isAfter(expirationDate)).orElse(false)));
@@ -75,15 +84,35 @@ public class MetadataMigrationService {
}));
}
public void nsFilesMigration() throws IOException {
this.namespacesPerTenant().entrySet().stream()
.flatMap(namespacesForTenant -> namespacesForTenant.getValue().stream().map(namespace -> Map.entry(namespacesForTenant.getKey(), namespace)))
.flatMap(throwFunction(namespaceForTenant -> {
List<PathAndAttributes> list = listAllFromStorage(storageInterface, StorageContext::namespaceFilePrefix, namespaceForTenant.getKey(), namespaceForTenant.getValue());
return list.stream()
.map(pathAndAttributes -> NamespaceFileMetadata.of(namespaceForTenant.getKey(), namespaceForTenant.getValue(), pathAndAttributes.path(), pathAndAttributes.attributes()));
}))
.forEach(throwConsumer(nsFileMetadata -> {
if (namespaceFileMetadataRepository.findByPath(nsFileMetadata.getTenantId(), nsFileMetadata.getNamespace(), nsFileMetadata.getPath()).isEmpty()) {
namespaceFileMetadataRepository.save(nsFileMetadata);
}
}));
}
public void secretMigration() throws Exception {
throw new UnsupportedOperationException("Secret migration is not needed in the OSS version");
}
private static List<FileAttributes> listAllFromStorage(StorageInterface storage, String tenant, String namespace) throws IOException {
private static List<PathAndAttributes> listAllFromStorage(StorageInterface storage, Function<String, String> prefixFunction, String tenant, String namespace) throws IOException {
try {
return storage.list(tenant, namespace, URI.create(StorageContext.KESTRA_PROTOCOL + StorageContext.kvPrefix(namespace)));
String prefix = prefixFunction.apply(namespace);
return storage.allByPrefix(tenant, namespace, URI.create(StorageContext.KESTRA_PROTOCOL + prefix + "/"), true).stream()
.map(throwFunction(uri -> new PathAndAttributes(uri.getPath().substring(prefix.length()), storage.getAttributes(tenant, namespace, uri))))
.toList();
} catch (FileNotFoundException e) {
return Collections.emptyList();
}
}
public record PathAndAttributes(String path, FileAttributes attributes) {}
}

View File

@@ -0,0 +1,31 @@
package io.kestra.cli.commands.migrations.metadata;
import io.kestra.cli.AbstractCommand;
import jakarta.inject.Inject;
import jakarta.inject.Provider;
import lombok.extern.slf4j.Slf4j;
import picocli.CommandLine;
@CommandLine.Command(
name = "nsfiles",
description = "populate metadata for Namespace Files"
)
@Slf4j
public class NsFilesMetadataMigrationCommand extends AbstractCommand {
@Inject
private Provider<MetadataMigrationService> metadataMigrationServiceProvider;
@Override
public Integer call() throws Exception {
super.call();
try {
metadataMigrationServiceProvider.get().nsFilesMigration();
} catch (Exception e) {
System.err.println("❌ Namespace Files Metadata migration failed: " + e.getMessage());
e.printStackTrace();
return 1;
}
System.out.println("✅ Namespace Files Metadata migration complete.");
return 0;
}
}

View File

@@ -0,0 +1,145 @@
package io.kestra.cli.commands.migrations.metadata;
import io.kestra.cli.App;
import io.kestra.core.exceptions.ResourceExpiredException;
import io.kestra.core.models.flows.Flow;
import io.kestra.core.models.flows.GenericFlow;
import io.kestra.core.models.kv.PersistedKvMetadata;
import io.kestra.core.models.namespaces.files.NamespaceFileMetadata;
import io.kestra.core.repositories.FlowRepositoryInterface;
import io.kestra.core.repositories.KvMetadataRepositoryInterface;
import io.kestra.core.repositories.NamespaceFileMetadataRepositoryInterface;
import io.kestra.core.serializers.JacksonMapper;
import io.kestra.core.storages.*;
import io.kestra.core.storages.kv.*;
import io.kestra.core.tenant.TenantService;
import io.kestra.core.utils.TestsUtils;
import io.kestra.plugin.core.log.Log;
import io.micronaut.configuration.picocli.PicocliRunner;
import io.micronaut.context.ApplicationContext;
import io.micronaut.context.env.Environment;
import io.micronaut.core.annotation.NonNull;
import org.junit.jupiter.api.Test;
import java.io.*;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
public class NsFilesMetadataMigrationCommandTest {
@Test
void run() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
ByteArrayOutputStream err = new ByteArrayOutputStream();
System.setErr(new PrintStream(err));
try (ApplicationContext ctx = ApplicationContext.run(Environment.CLI, Environment.TEST)) {
/* Initial setup:
* - namespace 1: my/path, value
* - namespace 1: another/path
* - namespace 2: yet/another/path
* - Nothing in database */
String namespace = TestsUtils.randomNamespace();
String path = "/my/path";
StorageInterface storage = ctx.getBean(StorageInterface.class);
String value = "someValue";
putOldNsFile(storage, namespace, path, value);
String anotherPath = "/another/path";
String anotherValue = "anotherValue";
putOldNsFile(storage, namespace, anotherPath, anotherValue);
String anotherNamespace = TestsUtils.randomNamespace();
String yetAnotherPath = "/yet/another/path";
String yetAnotherValue = "yetAnotherValue";
putOldNsFile(storage, anotherNamespace, yetAnotherPath, yetAnotherValue);
NamespaceFileMetadataRepositoryInterface namespaceFileMetadataRepository = ctx.getBean(NamespaceFileMetadataRepositoryInterface.class);
String tenantId = TenantService.MAIN_TENANT;
assertThat(namespaceFileMetadataRepository.findByPath(tenantId, namespace, path).isPresent()).isFalse();
/* Expected outcome from the migration command:
* - no namespace files has been migrated because no flow exist in the namespace so they are not picked up because we don't know they exist */
String[] nsFilesMetadataMigrationCommand = {
"migrate", "metadata", "nsfiles"
};
PicocliRunner.call(App.class, ctx, nsFilesMetadataMigrationCommand);
assertThat(out.toString()).contains("✅ Namespace Files Metadata migration complete.");
// Still it's not in the metadata repository because no flow exist to find that namespace file
assertThat(namespaceFileMetadataRepository.findByPath(tenantId, namespace, path).isPresent()).isFalse();
assertThat(namespaceFileMetadataRepository.findByPath(tenantId, namespace, anotherPath).isPresent()).isFalse();
assertThat(namespaceFileMetadataRepository.findByPath(tenantId, anotherNamespace, yetAnotherPath).isPresent()).isFalse();
// A flow is created from namespace 1, so the namespace files in this namespace should be migrated
FlowRepositoryInterface flowRepository = ctx.getBean(FlowRepositoryInterface.class);
flowRepository.create(GenericFlow.of(Flow.builder()
.tenantId(tenantId)
.id("a-flow")
.namespace(namespace)
.tasks(List.of(Log.builder().id("log").type(Log.class.getName()).message("logging").build()))
.build()));
/* We run the migration again:
* - namespace 1 my/path file is seen and metadata is migrated to database
* - namespace 1 another/path file is seen and metadata is migrated to database
* - namespace 2 yet/another/path is not seen because no flow exist in this namespace */
out.reset();
PicocliRunner.call(App.class, ctx, nsFilesMetadataMigrationCommand);
assertThat(out.toString()).contains("✅ Namespace Files Metadata migration complete.");
Optional<NamespaceFileMetadata> foundNsFile = namespaceFileMetadataRepository.findByPath(tenantId, namespace, path);
assertThat(foundNsFile.isPresent()).isTrue();
assertThat(foundNsFile.get().getVersion()).isEqualTo(1);
assertThat(foundNsFile.get().getSize()).isEqualTo(value.length());
Optional<NamespaceFileMetadata> anotherFoundNsFile = namespaceFileMetadataRepository.findByPath(tenantId, namespace, anotherPath);
assertThat(anotherFoundNsFile.isPresent()).isTrue();
assertThat(anotherFoundNsFile.get().getVersion()).isEqualTo(1);
assertThat(anotherFoundNsFile.get().getSize()).isEqualTo(anotherValue.length());
NamespaceFactory namespaceFactory = ctx.getBean(NamespaceFactory.class);
Namespace namespaceStorage = namespaceFactory.of(tenantId, namespace, storage);
FileAttributes nsFileRawMetadata = namespaceStorage.getFileMetadata(Path.of(path));
assertThat(nsFileRawMetadata.getSize()).isEqualTo(value.length());
assertThat(new String(namespaceStorage.getFileContent(Path.of(path)).readAllBytes())).isEqualTo(value);
FileAttributes anotherNsFileRawMetadata = namespaceStorage.getFileMetadata(Path.of(anotherPath));
assertThat(anotherNsFileRawMetadata.getSize()).isEqualTo(anotherValue.length());
assertThat(new String(namespaceStorage.getFileContent(Path.of(anotherPath)).readAllBytes())).isEqualTo(anotherValue);
assertThat(namespaceFileMetadataRepository.findByPath(tenantId, anotherNamespace, yetAnotherPath).isPresent()).isFalse();
assertThatThrownBy(() -> namespaceStorage.getFileMetadata(Path.of(yetAnotherPath))).isInstanceOf(FileNotFoundException.class);
/* We run one last time the migration without any change to verify that we don't resave an existing metadata.
* It covers the case where user didn't perform the migrate command yet but they played and added some KV from the UI (so those ones will already be in metadata database). */
out.reset();
PicocliRunner.call(App.class, ctx, nsFilesMetadataMigrationCommand);
assertThat(out.toString()).contains("✅ Namespace Files Metadata migration complete.");
foundNsFile = namespaceFileMetadataRepository.findByPath(tenantId, namespace, path);
assertThat(foundNsFile.get().getVersion()).isEqualTo(1);
}
}
private static void putOldNsFile(StorageInterface storage, String namespace, String path, String value) throws IOException {
URI nsFileStorageUri = getNsFileStorageUri(namespace, path);
storage.put(TenantService.MAIN_TENANT, namespace, nsFileStorageUri, new StorageObject(
null,
new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))
));
}
private static @NonNull URI getNsFileStorageUri(String namespace, String path) {
return URI.create(StorageContext.KESTRA_PROTOCOL + StorageContext.namespaceFilePrefix(namespace) + path);
}
}