Onboard the dotnet/issue-labeler for 'area-' label prediction on issues (#9889)

* [Issue-Labeler] Configure labeler-train.yml Configure the issue-labeler training workflow. - Imported the workflow template - Removed inputs, jobs, and conditions related to pull requests, as we will only use the labeler for issues in this repository * [Issue-Labeler] Configure labeler-promote.yml Configure the issue-labeler promotion workflow. - Imported the workflow template - Removed input and job for pull requests, as we will only use the labeler for issues in this repository * [Issue-Labeler] Configure labeler-predict-issues.yml Configure the issue-labeler issue prediction workflow. - Imported the workflow template - Removed the excluded_authors input * [Issue-Labeler] Configure labeler-cache-retention.yml Configure the issue-labeler cache retention workflow. - Imported the workflow template - Removed config related to pull requests, as we will only use the labeler for issues in this repository * [Issue-Labeler] Add labeler onboarding/configuration doc * Bump superlinter to v7 (via SHA) to get type:number support in GitHub workflows * Address CHECKOV lint errors in GitHub workflows * Ignore GitHub workflow files from prettier * Explicitly set workflow permissions * Comment that UTC is used for cron schedule
2025-12-19 09:50:46 -05:00 · 2025-06-23 15:00:12 -07:00
parent 2b337e1b84
commit 5d3e41fafb
10 changed files with 280 additions and 4 deletions
--- a/.checkov.yml
+++ b/.checkov.yml
@@ -0,0 +1,10 @@
+---
+# Available properties:
+# https://github.com/bridgecrewio/checkov#configuration-using-a-config-file
+
+skip-check:
+  # CKV_GHA_7: "The build output cannot be affected by user parameters other
+  # than the build entry point and the top-level source location. GitHub
+  # Actions workflow_dispatch inputs MUST be empty."
+  # Suppressed as issue-labeler workflows require `workflow_dispatch` inputs.
+  - CKV_GHA_7
--- a/.github/workflows/labeler-cache-retention.yml
+++ b/.github/workflows/labeler-cache-retention.yml
@@ -0,0 +1,36 @@
+# Workflow template imported from:
+# https://github.com/dotnet/.github/workflow-templates
+#
+# Regularly restore the prediction model from cache to prevent cache eviction
+name: "Labeler: Cache Retention"
+
+# For more information about GitHub's action cache limits and eviction policy, see:
+# https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy
+
+on:
+  schedule:
+    - cron: "24 12 * * *" # 12:24 UTC every day (arbitrary time daily)
+
+  workflow_dispatch:
+    inputs:
+      cache_key:
+        description: "The cache key suffix to use for restoring the model from cache. Defaults to 'ACTIVE'."
+        required: true
+        default: "ACTIVE"
+
+env:
+  CACHE_KEY: ${{ inputs.cache_key || 'ACTIVE' }}
+
+permissions: {}
+
+jobs:
+  restore-cache:
+    # Do not automatically run the workflow on forks outside the 'dotnet' org
+    if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: "issues"
+          cache_key: ${{ env.CACHE_KEY }}
+          fail-on-cache-miss: true
--- a/.github/workflows/labeler-predict-issues.yml
+++ b/.github/workflows/labeler-predict-issues.yml
@@ -0,0 +1,59 @@
+# Workflow template imported from:
+# https://github.com/dotnet/.github/workflow-templates
+#
+# Predict labels for Issues using a trained model
+name: "Labeler: Predict (Issues)"
+
+on:
+  # Only automatically predict area labels when issues are first opened
+  issues:
+    types: opened
+
+  # Allow dispatching the workflow via the Actions UI, specifying ranges of numbers
+  workflow_dispatch:
+    inputs:
+      issues:
+        description: "Issue Numbers (comma-separated list of ranges)."
+        required: true
+      cache_key:
+        description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'."
+        required: true
+        default: "ACTIVE"
+
+env:
+  # Do not allow failure for jobs triggered automatically (as this causes red noise on the workflows list)
+  ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }}
+
+  LABEL_PREFIX: "area-"
+  THRESHOLD: 0.40
+  DEFAULT_LABEL: "needs-area-label"
+
+permissions:
+  issues: write
+
+jobs:
+  predict-issue-label:
+    # Do not automatically run the workflow on forks outside the 'dotnet' org
+    if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Restore issues model from cache"
+        id: restore-model
+        uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: issues
+          fail-on-cache-miss: ${{ env.ALLOW_FAILURE }}
+          quiet: true
+
+      - name: "Predict issue labels"
+        id: prediction
+        if: ${{ steps.restore-model.outputs.cache-hit == 'true' }}
+        uses: dotnet/issue-labeler/predict@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          issues: ${{ inputs.issues || github.event.issue.number }}
+          label_prefix: ${{ env.LABEL_PREFIX }}
+          threshold: ${{ env.THRESHOLD }}
+          default_label: ${{ env.DEFAULT_LABEL }}
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        continue-on-error: ${{ !env.ALLOW_FAILURE }}
--- a/.github/workflows/labeler-promote.yml
+++ b/.github/workflows/labeler-promote.yml
@@ -0,0 +1,36 @@
+# Workflow template imported from:
+# https://github.com/dotnet/.github/workflow-templates
+#
+# Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model
+name: "Labeler: Promotion"
+
+on:
+  # Dispatched via the Actions UI, promotes the model from a staged slot into the active prediction environment
+  workflow_dispatch:
+    inputs:
+      issues:
+        description: "Issues: Promote Model"
+        type: boolean
+        required: true
+      staged_key:
+        description: "The cache key suffix to use for promoting a staged model to 'ACTIVE'. Defaults to 'staged'."
+        required: true
+        default: "staged"
+      backup_key:
+        description: "The cache key suffix to use for backing up the currently active model. Defaults to 'backup'."
+        default: "backup"
+
+permissions:
+  actions: write
+
+jobs:
+  promote-issues:
+    if: ${{ inputs.issues }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Promote Model for Issues"
+        uses: dotnet/issue-labeler/promote@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: "issues"
+          staged_key: ${{ inputs.staged_key }}
+          backup_key: ${{ inputs.backup_key }}
--- a/.github/workflows/labeler-train.yml
+++ b/.github/workflows/labeler-train.yml
@@ -0,0 +1,92 @@
+# Workflow template imported from:
+# https://github.com/dotnet/.github/workflow-templates
+#
+# Train the model for issue label prediction
+name: "Labeler: Training"
+
+on:
+  workflow_dispatch:
+    inputs:
+      steps:
+        description: "Training Steps"
+        type: choice
+        required: true
+        default: "All"
+        options:
+          - "All"
+          - "Download Data"
+          - "Train Model"
+          - "Test Model"
+
+      limit:
+        description: "Max number of items to download for training/testing the model (newest items are used). Defaults to the max number of pages times the page size."
+        type: number
+      page_size:
+        description: "Number of items per page in GitHub API requests. Defaults to 100."
+        type: number
+      page_limit:
+        description: "Maximum number of pages to download for training/testing the model. Defaults to 1000."
+        type: number
+      cache_key_suffix:
+        description: "The cache key suffix to use for staged data/models (use 'ACTIVE' to bypass staging). Defaults to 'staged'."
+        required: true
+        default: "staged"
+
+env:
+  CACHE_KEY: ${{ inputs.cache_key_suffix }}
+  REPOSITORY: ${{ github.repository }}
+  LABEL_PREFIX: "area-"
+  THRESHOLD: "0.40"
+  LIMIT: ${{ inputs.limit }}
+  PAGE_SIZE: ${{ inputs.page_size }}
+  PAGE_LIMIT: ${{ inputs.page_limit }}
+
+permissions:
+  issues: read
+
+jobs:
+  download-issues:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Download Issues"
+        uses: dotnet/issue-labeler/download@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: "issues"
+          cache_key: ${{ env.CACHE_KEY }}
+          repository: ${{ env.REPOSITORY }}
+          label_prefix: ${{ env.LABEL_PREFIX }}
+          limit: ${{ env.LIMIT }}
+          page_size: ${{ env.PAGE_SIZE }}
+          page_limit: ${{ env.PAGE_LIMIT }}
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+
+  train-issues:
+    runs-on: ubuntu-latest
+    permissions: {}
+    needs: download-issues
+    steps:
+      - name: "Train Model for Issues"
+        uses: dotnet/issue-labeler/train@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: "issues"
+          data_cache_key: ${{ env.CACHE_KEY }}
+          model_cache_key: ${{ env.CACHE_KEY }}
+
+  test-issues:
+    runs-on: ubuntu-latest
+    needs: train-issues
+    steps:
+      - name: "Test Model for Issues"
+        uses: dotnet/issue-labeler/test@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
+        with:
+          type: "issues"
+          cache_key: ${{ env.CACHE_KEY }}
+          repository: ${{ env.REPOSITORY }}
+          label_prefix: ${{ env.LABEL_PREFIX }}
+          threshold: ${{ env.THRESHOLD }}
+          limit: ${{ env.LIMIT }}
+          page_size: ${{ env.PAGE_SIZE }}
+          page_limit: ${{ env.PAGE_LIMIT }}
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
--- a/.github/workflows/labeler.md
+++ b/.github/workflows/labeler.md
@@ -0,0 +1,32 @@
+# Issue-Labeler Workflows
+
+This repository uses actions from [dotnet/issue-labeler](https://github.com/dotnet/issue-labeler) to predict area labels for issues and pull requests.
+
+The following workflow templates were imported and updated from [dotnet/.github/workflow-templates](https://github.com/dotnet/.github/tree/main/workflow-templates) using
+GitHub's UI for adding new workflows. Issue Labeler [Onboarding](https://github.com/dotnet/issue-labeler/wiki/Onboarding) was referenced for the configurations.
+
+1. `labeler-train.yml`
+2. `labeler-promote.yml`
+3. `labeler-predict-issues.yml`
+4. `labeler-cache-retention.yml`
+
+## Repository Configuration
+
+Across these workflows, the following changes were made to configure the issue labeler for this repository:
+
+1. Set `LABEL_PREFIX` to `"area-"`:
+    - `labeler-predict-issues.yml`
+    - `labeler-train.yml`
+2. Set the `DEFAULT_LABEL` value to `"needs-area-label"` to apply a default label when no prediction is made.
+    - `labeler-predict-issues.yml`
+3. Remove the `EXCLUDED_AUTHORS` value as we do not bypass labeling for any authors' issues/pulls in this repository:
+    - `labeler-predict-issues.yml`
+    - `labeler-train.yml`
+4. Remove the `repository` input for training the models against another repository:
+    - `labeler-train.yml`
+5. Update the cache retention cron schedule to an arbitrary time of day:
+    - `labeler-cache-retention.yml`
+6. Remove configuration, inputs, and jobs related to pull requests, as we will only predict labels for issues in this repository:
+    - `labeler-train.yml`
+    - `labeler-promote.yml`
+    - `labeler-cache-retention.yml`
--- a/.github/workflows/markdown-link-check.yml
+++ b/.github/workflows/markdown-link-check.yml
@@ -9,6 +9,9 @@ on:
    paths:
      - '**.md'

+permissions:
+  contents: read
+
 jobs:
  markdown-link-check:
    runs-on: ubuntu-latest
--- a/.github/workflows/no-response.yml
+++ b/.github/workflows/no-response.yml
@@ -12,11 +12,12 @@ on:
    # Schedule for five minutes after the hour, every hour
    - cron: '5 * * * *'

+permissions:
+  issues: write
+
 jobs:
  noResponse:
    runs-on: ubuntu-latest
-    permissions:
-      issues: write
    steps:
      - uses: actions/stale@v9
        with:
--- a/.github/workflows/super-linter.yml
+++ b/.github/workflows/super-linter.yml
@@ -11,18 +11,22 @@ on:
    branches: [main]
  pull_request:
    branches: [main]
+
+permissions:
+  contents: read
+
 jobs:
  run-lint:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          # Full git history is needed to get a proper list of changed files within `super-linter`
          fetch-depth: 0

      - name: Lint Code Base
-        uses: github/super-linter@v5
+        uses: github/super-linter@b807e99ddd37e444d189cfd2c2ca1274d8ae8ef1 # v7
        env:
          VALIDATE_ALL_CODEBASE: false
          VALIDATE_JSCPD: false
--- a/.prettierignore
+++ b/.prettierignore
@@ -0,0 +1,3 @@
+# Ignore GitHub Workflow files and docs
+.github/workflows/*.yml
+.github/workflows/*.md