[PM-14880] ci: Add automated PR labelling based on file paths and title patterns (#6157)

This commit is contained in:
Álison Fernandes 2025-11-17 20:19:12 +00:00 committed by GitHub
parent 21afa81322
commit 4623a4f079
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 367 additions and 1 deletions

49
.github/label-pr.json vendored Normal file
View File

@ -0,0 +1,49 @@
{
"catch_all_label": "t:misc",
"title_patterns": {
"t:new-feature": ["feat", "feature"],
"t:enhancement": ["enhancement", "enh", "impr"],
"t:bug": ["fix", "bug", "bugfix"],
"t:tech-debt": ["refactor", "chore", "cleanup", "revert", "debt", "test", "perf"],
"t:docs": ["docs"],
"t:ci": ["ci", "build", "chore(ci)"],
"t:deps": ["deps"],
"t:breaking-change": ["breaking", "breaking-change"],
"t:misc": ["misc"]
},
"path_patterns": {
"app:shared": [
"annotation/",
"core/",
"data/",
"network/",
"ui/",
"authenticatorbridge/",
"gradle/"
],
"app:password-manager": [
"app/",
"cxf/"
],
"app:authenticator": [
"authenticator/"
],
"t:ci": [
".github/",
"scripts/",
"fastlane/",
".gradle/",
".claude/",
"detekt-config.yml"
],
"t:docs": [
"docs/"
],
"t:deps": [
"gradle/"
],
"t:misc": [
"keystore/"
]
}
}

238
.github/scripts/label-pr.py vendored Normal file
View File

@ -0,0 +1,238 @@
#!/usr/bin/env python3
# Requires Python 3.9+
"""
Label pull requests based on changed file paths and PR title patterns (conventional commit format).
Usage:
python label-pr.py <pr-number> [-a|--add|-r|--replace] [-d|--dry-run] [-c|--config CONFIG]
Arguments:
pr-number: The pull request number
-a, --add: Add labels without removing existing ones (default)
-r, --replace: Replace all existing labels
-d, --dry-run: Run without actually applying labels
-c, --config: Path to JSON config file (default: .github/label-pr.json)
Examples:
python label-pr.py 1234
python label-pr.py 1234 -a
python label-pr.py 1234 --replace
python label-pr.py 1234 -r -d
python label-pr.py 1234 --config custom-config.json
"""
import argparse
import json
import os
import subprocess
import sys
DEFAULT_MODE = "add"
DEFAULT_CONFIG_PATH = ".github/label-pr.json"
def load_config_json(config_file: str) -> dict:
"""Load configuration from JSON file."""
if not os.path.exists(config_file):
print(f"❌ Config file not found: {config_file}")
sys.exit(1)
try:
with open(config_file, 'r') as f:
config = json.load(f)
print(f"✅ Loaded config from: {config_file}")
valid_config = True
if not config.get("catch_all_label"):
print("❌ Missing 'catch_all_label' in config file")
valid_config = False
if not config.get("title_patterns"):
print("❌ Missing 'title_patterns' in config file")
valid_config = False
if not config.get("path_patterns"):
print("❌ Missing 'path_patterns' in config file")
valid_config = False
if not valid_config:
print("::error::Invalid label-pr.json config file, exiting...")
sys.exit(1)
return config
except json.JSONDecodeError as e:
print(f"❌ JSON deserialization error in label-pr.json config: {e}")
sys.exit(1)
except Exception as e:
print(f"❌ Unexpected error loading label-pr.json config: {e}")
sys.exit(1)
def gh_get_changed_files(pr_number: str) -> list[str]:
"""Get list of changed files in a pull request."""
try:
result = subprocess.run(
["gh", "pr", "diff", pr_number, "--name-only"],
capture_output=True,
text=True,
check=True
)
changed_files = result.stdout.strip().split("\n")
return list(filter(None, changed_files))
except subprocess.CalledProcessError as e:
print(f"::error::Error getting changed files: {e}")
return []
def gh_get_pr_title(pr_number: str) -> str:
"""Get the title of a pull request."""
try:
result = subprocess.run(
["gh", "pr", "view", pr_number, "--json", "title", "--jq", ".title"],
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
print(f"::error::Error getting PR title: {e}")
return ""
def gh_add_labels(pr_number: str, labels: list[str]) -> None:
"""Add labels to a pull request (doesn't remove existing labels)."""
gh_labels = ','.join(labels)
subprocess.run(
["gh", "pr", "edit", pr_number, "--add-label", gh_labels],
check=True
)
def gh_replace_labels(pr_number: str, labels: list[str]) -> None:
"""Replace all labels on a pull request with the specified labels."""
payload = json.dumps({"labels": labels})
subprocess.run(
["gh", "api", "repos/{owner}/{repo}/issues/" + pr_number, "-X", "PATCH", "--silent", "--input", "-"],
input=payload,
text=True,
check=True
)
def label_filepaths(changed_files: list[str], path_patterns: dict) -> list[str]:
"""Check changed files against path patterns and return labels to apply."""
if not changed_files:
return []
labels_to_apply = set() # Use set to avoid duplicates
for label, patterns in path_patterns.items():
for file in changed_files:
if any(file.startswith(pattern) for pattern in patterns):
print(f"👀 File '{file}' matches pattern for label '{label}'")
labels_to_apply.add(label)
break
if "app:shared" in labels_to_apply:
labels_to_apply.add("app:password-manager")
labels_to_apply.add("app:authenticator")
labels_to_apply.remove("app:shared")
if not labels_to_apply:
print("::warning::No matching file paths found, no labels applied.")
return list(labels_to_apply)
def label_title(pr_title: str, title_patterns: dict) -> list[str]:
"""Check PR title against patterns and return labels to apply."""
if not pr_title:
return []
labels_to_apply = set()
title_lower = pr_title.lower()
for label, patterns in title_patterns.items():
for pattern in patterns:
# Check for pattern with : or ( suffix (conventional commits format)
if f"{pattern}:" in title_lower or f"{pattern}(" in title_lower:
print(f"📝 Title matches pattern '{pattern}' for label '{label}'")
labels_to_apply.add(label)
break
if not labels_to_apply:
print("::warning::No matching title patterns found, no labels applied.")
return list(labels_to_apply)
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Label pull requests based on changed file paths and PR title patterns."
)
parser.add_argument(
"pr_number",
help="The pull request number"
)
mode_group = parser.add_mutually_exclusive_group()
mode_group.add_argument(
"-a", "--add",
action="store_true",
help="Add labels without removing existing ones (default)"
)
mode_group.add_argument(
"-r", "--replace",
action="store_true",
help="Replace all existing labels"
)
parser.add_argument(
"-d", "--dry-run",
action="store_true",
help="Run without actually applying labels"
)
parser.add_argument(
"-c", "--config",
default=DEFAULT_CONFIG_PATH,
help=f"Path to JSON config file (default: {DEFAULT_CONFIG_PATH})"
)
args, unknown = parser.parse_known_args() # required to handle --dry-run passed as an empty string ("") by the workflow
return args
def main():
args = parse_args()
config = load_config_json(args.config)
CATCH_ALL_LABEL = config["catch_all_label"]
LABEL_TITLE_PATTERNS = config["title_patterns"]
LABEL_PATH_PATTERNS = config["path_patterns"]
pr_number = args.pr_number
mode = "replace" if args.replace else "add"
if args.dry_run:
print("🔍 DRY RUN MODE - Labels will not be applied")
print(f"📌 Labeling mode: {mode}")
print(f"🔍 Checking PR #{pr_number}...")
pr_title = gh_get_pr_title(pr_number)
print(f"📋 PR Title: {pr_title}\n")
changed_files = gh_get_changed_files(pr_number)
print("👀 Changed files:\n" + "\n".join(changed_files) + "\n")
filepath_labels = label_filepaths(changed_files, LABEL_PATH_PATTERNS)
title_labels = label_title(pr_title, LABEL_TITLE_PATTERNS)
all_labels = set(filepath_labels + title_labels)
if not any(label.startswith("t:") for label in all_labels):
all_labels.add(CATCH_ALL_LABEL)
if all_labels:
labels_str = ', '.join(sorted(all_labels))
if mode == "add":
print(f"🏷️ Adding labels: {labels_str}")
if not args.dry_run:
gh_add_labels(pr_number, list(all_labels))
else:
print(f"🏷️ Replacing labels with: {labels_str}")
if not args.dry_run:
gh_replace_labels(pr_number, list(all_labels))
else:
print(" No matching patterns found, no labels applied.")
print("✅ Done")
if __name__ == "__main__":
main()

80
.github/workflows/sdlc-label-pr.yml vendored Normal file
View File

@ -0,0 +1,80 @@
name: SDLC / Label PR by Files
on:
workflow_dispatch:
inputs:
pr-number:
description: "Pull Request Number"
required: true
type: number
mode:
description: "Labeling Mode"
type: choice
options:
- add
- replace
default: add
dry-run:
description: "Dry Run - Don't apply labels"
type: boolean
default: false
jobs:
label-pr:
name: Label PR by Changed Files
runs-on: ubuntu-24.04
permissions:
pull-requests: write # required to update labels
contents: read
steps:
- name: Check out repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
persist-credentials: false
- name: Determine label mode for Pull Request
id: label-mode
env:
GH_TOKEN: ${{ github.token }}
_PR_NUMBER: ${{ inputs.pr-number }}
_PR_USER: ${{ github.event.pull_request.user.login }}
_IS_FORK: ${{ github.event.pull_request.head.repo.fork }}
run: |
# Support workflow_dispatch testing by retrieving PR data
if [ -z "$_PR_USER" ]; then
echo "👀 PR User is empty, retrieving PR data for PR #$_PR_NUMBER..."
PR_DATA=$(gh pr view "$_PR_NUMBER" --json author,isCrossRepository)
_PR_USER=$(echo "$PR_DATA" | jq -r '.author.login')
_IS_FORK=$(echo "$PR_DATA" | jq -r '.isCrossRepository')
fi
echo "📋 PR User: $_PR_USER"
echo "📋 Is Fork: $_IS_FORK"
# Handle PRs with labels set by other automations by adding instead of replacing
if [ "$_IS_FORK" = "true" ]; then
echo "➡️ Fork PR ($_PR_USER). Label mode: --add"
echo "label_mode=--add" >> "$GITHUB_OUTPUT"
exit 0
fi
if [ "$_PR_USER" = "renovate[bot]" ] || [ "$_PR_USER" = "bw-ghapp[bot]" ]; then
echo "➡️ Bot PR ($_PR_USER). Label mode: --add"
echo "label_mode=--add" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "➡️ Normal PR. Label mode: --replace"
echo "label_mode=--replace" >> "$GITHUB_OUTPUT"
- name: Label PR based on changed files
env:
GH_TOKEN: ${{ github.token }}
_PR_NUMBER: ${{ inputs.pr-number || github.event.pull_request.number }}
_LABEL_MODE: ${{ inputs.mode && format('--{0}', inputs.mode) || steps.label-mode.outputs.label_mode }}
_DRY_RUN: ${{ inputs.dry-run == true && '--dry-run' || '' }}
run: |
echo "🔍 Labeling PR #$_PR_NUMBER with mode: $_LABEL_MODE and dry-run: $_DRY_RUN"
python3 .github/scripts/label-pr.py "$_PR_NUMBER" "$_LABEL_MODE" "$_DRY_RUN"

View File

@ -1 +0,0 @@
npx lint-staged