From 4623a4f07955e190387953c96e9baa4af24065df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lison=20Fernandes?= Date: Mon, 17 Nov 2025 20:19:12 +0000 Subject: [PATCH] [PM-14880] ci: Add automated PR labelling based on file paths and title patterns (#6157) --- .github/label-pr.json | 49 ++++++ .github/scripts/label-pr.py | 238 ++++++++++++++++++++++++++++ .github/workflows/sdlc-label-pr.yml | 80 ++++++++++ .husky/pre-commit | 1 - 4 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 .github/label-pr.json create mode 100644 .github/scripts/label-pr.py create mode 100644 .github/workflows/sdlc-label-pr.yml delete mode 100755 .husky/pre-commit diff --git a/.github/label-pr.json b/.github/label-pr.json new file mode 100644 index 0000000000..d99d0d1b0d --- /dev/null +++ b/.github/label-pr.json @@ -0,0 +1,49 @@ +{ + "catch_all_label": "t:misc", + "title_patterns": { + "t:new-feature": ["feat", "feature"], + "t:enhancement": ["enhancement", "enh", "impr"], + "t:bug": ["fix", "bug", "bugfix"], + "t:tech-debt": ["refactor", "chore", "cleanup", "revert", "debt", "test", "perf"], + "t:docs": ["docs"], + "t:ci": ["ci", "build", "chore(ci)"], + "t:deps": ["deps"], + "t:breaking-change": ["breaking", "breaking-change"], + "t:misc": ["misc"] + }, + "path_patterns": { + "app:shared": [ + "annotation/", + "core/", + "data/", + "network/", + "ui/", + "authenticatorbridge/", + "gradle/" + ], + "app:password-manager": [ + "app/", + "cxf/" + ], + "app:authenticator": [ + "authenticator/" + ], + "t:ci": [ + ".github/", + "scripts/", + "fastlane/", + ".gradle/", + ".claude/", + "detekt-config.yml" + ], + "t:docs": [ + "docs/" + ], + "t:deps": [ + "gradle/" + ], + "t:misc": [ + "keystore/" + ] + } +} diff --git a/.github/scripts/label-pr.py b/.github/scripts/label-pr.py new file mode 100644 index 0000000000..2d765cf416 --- /dev/null +++ b/.github/scripts/label-pr.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# Requires Python 3.9+ +""" +Label pull requests based on changed file paths and PR title patterns (conventional commit format). + +Usage: + python label-pr.py [-a|--add|-r|--replace] [-d|--dry-run] [-c|--config CONFIG] + +Arguments: + pr-number: The pull request number + -a, --add: Add labels without removing existing ones (default) + -r, --replace: Replace all existing labels + -d, --dry-run: Run without actually applying labels + -c, --config: Path to JSON config file (default: .github/label-pr.json) + +Examples: + python label-pr.py 1234 + python label-pr.py 1234 -a + python label-pr.py 1234 --replace + python label-pr.py 1234 -r -d + python label-pr.py 1234 --config custom-config.json +""" + +import argparse +import json +import os +import subprocess +import sys + +DEFAULT_MODE = "add" +DEFAULT_CONFIG_PATH = ".github/label-pr.json" + +def load_config_json(config_file: str) -> dict: + """Load configuration from JSON file.""" + if not os.path.exists(config_file): + print(f"❌ Config file not found: {config_file}") + sys.exit(1) + + try: + with open(config_file, 'r') as f: + config = json.load(f) + print(f"✅ Loaded config from: {config_file}") + + valid_config = True + if not config.get("catch_all_label"): + print("❌ Missing 'catch_all_label' in config file") + valid_config = False + if not config.get("title_patterns"): + print("❌ Missing 'title_patterns' in config file") + valid_config = False + if not config.get("path_patterns"): + print("❌ Missing 'path_patterns' in config file") + valid_config = False + + if not valid_config: + print("::error::Invalid label-pr.json config file, exiting...") + sys.exit(1) + + return config + except json.JSONDecodeError as e: + print(f"❌ JSON deserialization error in label-pr.json config: {e}") + sys.exit(1) + except Exception as e: + print(f"❌ Unexpected error loading label-pr.json config: {e}") + sys.exit(1) + +def gh_get_changed_files(pr_number: str) -> list[str]: + """Get list of changed files in a pull request.""" + try: + result = subprocess.run( + ["gh", "pr", "diff", pr_number, "--name-only"], + capture_output=True, + text=True, + check=True + ) + changed_files = result.stdout.strip().split("\n") + return list(filter(None, changed_files)) + except subprocess.CalledProcessError as e: + print(f"::error::Error getting changed files: {e}") + return [] + +def gh_get_pr_title(pr_number: str) -> str: + """Get the title of a pull request.""" + try: + result = subprocess.run( + ["gh", "pr", "view", pr_number, "--json", "title", "--jq", ".title"], + capture_output=True, + text=True, + check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"::error::Error getting PR title: {e}") + return "" + +def gh_add_labels(pr_number: str, labels: list[str]) -> None: + """Add labels to a pull request (doesn't remove existing labels).""" + gh_labels = ','.join(labels) + subprocess.run( + ["gh", "pr", "edit", pr_number, "--add-label", gh_labels], + check=True + ) + +def gh_replace_labels(pr_number: str, labels: list[str]) -> None: + """Replace all labels on a pull request with the specified labels.""" + payload = json.dumps({"labels": labels}) + subprocess.run( + ["gh", "api", "repos/{owner}/{repo}/issues/" + pr_number, "-X", "PATCH", "--silent", "--input", "-"], + input=payload, + text=True, + check=True + ) + +def label_filepaths(changed_files: list[str], path_patterns: dict) -> list[str]: + """Check changed files against path patterns and return labels to apply.""" + if not changed_files: + return [] + + labels_to_apply = set() # Use set to avoid duplicates + + for label, patterns in path_patterns.items(): + for file in changed_files: + if any(file.startswith(pattern) for pattern in patterns): + print(f"👀 File '{file}' matches pattern for label '{label}'") + labels_to_apply.add(label) + break + + if "app:shared" in labels_to_apply: + labels_to_apply.add("app:password-manager") + labels_to_apply.add("app:authenticator") + labels_to_apply.remove("app:shared") + + if not labels_to_apply: + print("::warning::No matching file paths found, no labels applied.") + + return list(labels_to_apply) + +def label_title(pr_title: str, title_patterns: dict) -> list[str]: + """Check PR title against patterns and return labels to apply.""" + if not pr_title: + return [] + + labels_to_apply = set() + title_lower = pr_title.lower() + for label, patterns in title_patterns.items(): + for pattern in patterns: + # Check for pattern with : or ( suffix (conventional commits format) + if f"{pattern}:" in title_lower or f"{pattern}(" in title_lower: + print(f"📝 Title matches pattern '{pattern}' for label '{label}'") + labels_to_apply.add(label) + break + + if not labels_to_apply: + print("::warning::No matching title patterns found, no labels applied.") + + return list(labels_to_apply) + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Label pull requests based on changed file paths and PR title patterns." + ) + parser.add_argument( + "pr_number", + help="The pull request number" + ) + + mode_group = parser.add_mutually_exclusive_group() + mode_group.add_argument( + "-a", "--add", + action="store_true", + help="Add labels without removing existing ones (default)" + ) + mode_group.add_argument( + "-r", "--replace", + action="store_true", + help="Replace all existing labels" + ) + + parser.add_argument( + "-d", "--dry-run", + action="store_true", + help="Run without actually applying labels" + ) + + parser.add_argument( + "-c", "--config", + default=DEFAULT_CONFIG_PATH, + help=f"Path to JSON config file (default: {DEFAULT_CONFIG_PATH})" + ) + args, unknown = parser.parse_known_args() # required to handle --dry-run passed as an empty string ("") by the workflow + return args + +def main(): + args = parse_args() + config = load_config_json(args.config) + CATCH_ALL_LABEL = config["catch_all_label"] + LABEL_TITLE_PATTERNS = config["title_patterns"] + LABEL_PATH_PATTERNS = config["path_patterns"] + + pr_number = args.pr_number + mode = "replace" if args.replace else "add" + + if args.dry_run: + print("🔍 DRY RUN MODE - Labels will not be applied") + print(f"📌 Labeling mode: {mode}") + print(f"🔍 Checking PR #{pr_number}...") + + pr_title = gh_get_pr_title(pr_number) + print(f"📋 PR Title: {pr_title}\n") + + changed_files = gh_get_changed_files(pr_number) + print("👀 Changed files:\n" + "\n".join(changed_files) + "\n") + + filepath_labels = label_filepaths(changed_files, LABEL_PATH_PATTERNS) + title_labels = label_title(pr_title, LABEL_TITLE_PATTERNS) + all_labels = set(filepath_labels + title_labels) + + if not any(label.startswith("t:") for label in all_labels): + all_labels.add(CATCH_ALL_LABEL) + + if all_labels: + labels_str = ', '.join(sorted(all_labels)) + if mode == "add": + print(f"🏷️ Adding labels: {labels_str}") + if not args.dry_run: + gh_add_labels(pr_number, list(all_labels)) + else: + print(f"🏷️ Replacing labels with: {labels_str}") + if not args.dry_run: + gh_replace_labels(pr_number, list(all_labels)) + else: + print("ℹ️ No matching patterns found, no labels applied.") + + print("✅ Done") + +if __name__ == "__main__": + main() diff --git a/.github/workflows/sdlc-label-pr.yml b/.github/workflows/sdlc-label-pr.yml new file mode 100644 index 0000000000..c3a2d4144a --- /dev/null +++ b/.github/workflows/sdlc-label-pr.yml @@ -0,0 +1,80 @@ +name: SDLC / Label PR by Files + +on: + workflow_dispatch: + inputs: + pr-number: + description: "Pull Request Number" + required: true + type: number + mode: + description: "Labeling Mode" + type: choice + options: + - add + - replace + default: add + dry-run: + description: "Dry Run - Don't apply labels" + type: boolean + default: false + +jobs: + label-pr: + name: Label PR by Changed Files + runs-on: ubuntu-24.04 + permissions: + pull-requests: write # required to update labels + contents: read + + steps: + - name: Check out repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + persist-credentials: false + + - name: Determine label mode for Pull Request + id: label-mode + env: + GH_TOKEN: ${{ github.token }} + _PR_NUMBER: ${{ inputs.pr-number }} + _PR_USER: ${{ github.event.pull_request.user.login }} + _IS_FORK: ${{ github.event.pull_request.head.repo.fork }} + run: | + # Support workflow_dispatch testing by retrieving PR data + if [ -z "$_PR_USER" ]; then + echo "👀 PR User is empty, retrieving PR data for PR #$_PR_NUMBER..." + PR_DATA=$(gh pr view "$_PR_NUMBER" --json author,isCrossRepository) + _PR_USER=$(echo "$PR_DATA" | jq -r '.author.login') + _IS_FORK=$(echo "$PR_DATA" | jq -r '.isCrossRepository') + fi + + echo "📋 PR User: $_PR_USER" + echo "📋 Is Fork: $_IS_FORK" + + # Handle PRs with labels set by other automations by adding instead of replacing + if [ "$_IS_FORK" = "true" ]; then + echo "➡️ Fork PR ($_PR_USER). Label mode: --add" + echo "label_mode=--add" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if [ "$_PR_USER" = "renovate[bot]" ] || [ "$_PR_USER" = "bw-ghapp[bot]" ]; then + echo "➡️ Bot PR ($_PR_USER). Label mode: --add" + echo "label_mode=--add" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "➡️ Normal PR. Label mode: --replace" + echo "label_mode=--replace" >> "$GITHUB_OUTPUT" + + - name: Label PR based on changed files + env: + GH_TOKEN: ${{ github.token }} + _PR_NUMBER: ${{ inputs.pr-number || github.event.pull_request.number }} + _LABEL_MODE: ${{ inputs.mode && format('--{0}', inputs.mode) || steps.label-mode.outputs.label_mode }} + _DRY_RUN: ${{ inputs.dry-run == true && '--dry-run' || '' }} + run: | + echo "🔍 Labeling PR #$_PR_NUMBER with mode: $_LABEL_MODE and dry-run: $_DRY_RUN" + python3 .github/scripts/label-pr.py "$_PR_NUMBER" "$_LABEL_MODE" "$_DRY_RUN" + diff --git a/.husky/pre-commit b/.husky/pre-commit deleted file mode 100755 index 2312dc587f..0000000000 --- a/.husky/pre-commit +++ /dev/null @@ -1 +0,0 @@ -npx lint-staged