1
0
mirror of synced 2025-12-26 14:02:10 -05:00
Files
airbyte/tools/bin/identify-dormant-workflows.py
2023-08-25 10:20:41 -07:00

114 lines
3.7 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import argparse
import json
import os
import re
import subprocess
from datetime import datetime, timedelta
from github import Github
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
DAYS_TO_KEEP_ORPHANED_JOBS = 90
SLACK_CHANNEL_FOR_NOTIFICATIONS = "infra-alerts"
"""
This script is intended to be run in conjuction with cleanup-workflow-runs.py to keep GH actions clean.
The basic workflow is
identify-dormant-workflows.py notifies of dormant workflows (workflows that have no runs newer than DAYS_TO_KEEP_ORPHANED_JOBS days) daily ->
manually notifies infra team via slack ->
infra team checks with stakeholders to ensure dormant jobs can be deleted and then cleans up workflow runs manually ->
cleanup-workflows.py deletes old workflow runs (again older than DAYS_TO_KEEP_ORPHANED_JOBS) that have no associated workflow
We need to clean up the runs because even if a workflow is deleted, the runs linger in the UI.
We don't want to delete workflow runs newer than 90 days on GH actions, even if the workflow doesn't exist.
it's possible that people might test things off the master branch and we don't want to delete their recent runs
"""
# Initiate the parser
parser = argparse.ArgumentParser()
# Add long and short argument
parser.add_argument("--pat", "-p", help="Set github personal access token")
parser.add_argument("--sat", "-s", help="Set slack api token. Optional. If not passed, will just print to console")
def main():
# Read arguments from the command line
args = parser.parse_args()
# Check for user supplied PAT. If not supplied, assume we are running in actions
# and pull from environment
gh_token = None
slack_token = None
if args.pat:
gh_token = args.pat
else:
gh_token = os.getenv("GITHUB_TOKEN")
if not gh_token:
raise Exception("Github personal access token not provided via args and not available in GITHUB_TOKEN variable")
if args.sat:
slack_token = args.sat
else:
slack_token = os.getenv("SLACK_TOKEN")
g = Github(gh_token)
git_url = subprocess.run(["git", "config", "--get", "remote.origin.url"], check=True, capture_output=True)
# will match both forms (git and https url) of github e.g.
# git@github.com:airbytehq/airbyte.git
# https://github.com/airbytehq/airbyte.git
git_url_regex = re.compile(r"(?:git@|https://)github\.com[:/](.*?)(\.git|$)")
re_match = git_url_regex.match(git_url.stdout.decode("utf-8"))
repo_name = re_match.group(1)
repo = g.get_repo(repo_name)
workflows = repo.get_workflows()
runs_to_delete = []
for workflow in workflows:
runs = workflow.get_runs()
for run in runs:
# check and see if a workflow exists but is not actively being triggered/run
if os.path.exists(workflow.path) and run.updated_at < datetime.now() - timedelta(days=DAYS_TO_KEEP_ORPHANED_JOBS):
message = (
"The Github Workflow '"
+ workflow.name
+ "' exists in "
+ repo_name
+ " but has no run newer than 90 days old. URL: "
+ workflow.html_url
)
print(message)
if slack_token:
print("Sending Slack notification...")
client = WebClient(slack_token)
try:
response = client.chat_postMessage(channel=SLACK_CHANNEL_FOR_NOTIFICATIONS, text=message)
except SlackApiError as e:
print(e, "\n\n")
raise Exception("Error calling the Slack API")
break
if __name__ == "__main__":
main()