feat(registry): add remove latest partition job (#44795)
## What Provides a job to delete all latest partitions. Why? If you remove the partitions they will be readded **triggering a reprocess of latest** ## Review guide <!-- 1. `x.py` 2. `y.py` --> ## User Impact <!-- * What is the end result perceived by the user? * If there are negative side effects, please list them. --> ## Can this PR be safely reverted and rolled back? <!-- * If unsure, leave it blank. --> - [ ] YES 💚 - [ ] NO ❌
This commit is contained in:
@@ -25,6 +25,7 @@ from orchestrator.jobs.metadata import generate_stale_gcs_latest_metadata_file
|
||||
from orchestrator.jobs.registry import (
|
||||
add_new_metadata_partitions,
|
||||
remove_stale_metadata_partitions,
|
||||
remove_latest_metadata_partitions,
|
||||
generate_cloud_registry,
|
||||
generate_oss_registry,
|
||||
generate_registry_entry,
|
||||
@@ -203,6 +204,7 @@ JOBS = [
|
||||
generate_nightly_reports,
|
||||
add_new_metadata_partitions,
|
||||
remove_stale_metadata_partitions,
|
||||
remove_latest_metadata_partitions,
|
||||
generate_stale_gcs_latest_metadata_file,
|
||||
]
|
||||
|
||||
|
||||
@@ -52,6 +52,35 @@ def remove_stale_metadata_partitions():
|
||||
remove_stale_metadata_partitions_op()
|
||||
|
||||
|
||||
@op(required_resource_keys={"latest_metadata_file_blobs"})
|
||||
def remove_latest_metadata_partitions_op(context):
|
||||
"""
|
||||
This op is responsible for removing for latest metadata files. (Generally used to reprocess metadata files).
|
||||
"""
|
||||
latest_metadata_file_blobs = context.resources.latest_metadata_file_blobs
|
||||
partition_name = registry_entry.metadata_partitions_def.name
|
||||
|
||||
all_latest = [blob.etag for blob in latest_metadata_file_blobs]
|
||||
context.log.info(f"Found {len(all_latest)} latest metadata files found in GCS bucket")
|
||||
|
||||
all_etag_partitions = context.instance.get_dynamic_partitions(partition_name)
|
||||
context.log.info(f"Found {len(all_etag_partitions)} existing metadata partitions")
|
||||
|
||||
for latest_etag in all_latest:
|
||||
if latest_etag in all_etag_partitions:
|
||||
context.log.info(f"Removing latest etag: {latest_etag}")
|
||||
context.instance.delete_dynamic_partition(partition_name, latest_etag)
|
||||
context.log.info(f"Removed latest etag: {latest_etag}")
|
||||
|
||||
|
||||
@job(tags={"dagster/priority": HIGH_QUEUE_PRIORITY})
|
||||
def remove_latest_metadata_partitions():
|
||||
"""
|
||||
This job is responsible for removing latest metadata partitions. (Generally used to reprocess metadata files).
|
||||
"""
|
||||
remove_latest_metadata_partitions_op()
|
||||
|
||||
|
||||
@op(required_resource_keys={"slack", "all_metadata_file_blobs"})
|
||||
def add_new_metadata_partitions_op(context):
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "orchestrator"
|
||||
version = "0.4.0"
|
||||
version = "0.4.1"
|
||||
description = ""
|
||||
authors = ["Ben Church <ben@airbyte.io>"]
|
||||
readme = "README.md"
|
||||
|
||||
Reference in New Issue
Block a user