android/.github/scripts/release-notes/process_release_notes.py

import re
import sys
import subprocess
import json
from typing import List, Tuple

def extract_jira_tickets(line: str) -> List[str]:
    # Find all Jira tickets in format ABC-123 (with any prefix/suffix)
    return re.findall(r'[A-Z]+-\d+', line)

def extract_pr_numbers(line: str) -> List[str]:
    # Match PR numbers from GitHub format (#123)
    return re.findall(r'#(\d+)', line)

def extract_pr_url(line: str) -> List[str]:
    """Match PR URL from GitHub format https://github.com/foo/bar/pull/123"""
    return re.findall(r'https://github\.com/[\w-]+/[\w.-]+/pull/\d+', line)

def fetch_labels(github_pr_url: str) -> List[str]:
    """Fetch labels from a GitHub PR using the GitHub CLI."""
    result = subprocess.run(
        ['gh', 'pr', 'view', github_pr_url, '--json', 'labels', '--jq', '.labels[].name'],
        capture_output=True,
        text=True,
        check=True
    )
    return [label.strip() for label in result.stdout.strip().split('\n') if label.strip()]

def process_line(line: str) -> str:
    """Process a single line from release notes by removing Jira tickets, conventional commit prefixes and other common patterns.

    Args:
        line: A single line from release notes

    Returns:
        Processed line with tickets and prefixes removed

    Example:
        >>> process_line("[ABC-123] feat(ui): Add new button")
        "Add new button"
    """
    original = line

    # Remove Jira ticket patterns:
    line = re.sub(r'\[[A-Z]+-\d+\]', '', line) # [ABC-123] -> ""
    line = re.sub(r'[A-Z]+-\d+:\s', '', line) # ABC-123: -> ""
    line = re.sub(r'[A-Z]+-\d+\s-\s', '', line) # ABC-123 - -> ""

    # Remove keywords and their variations
    patterns = [
        r'🍒',                      # 🍒 -> ""
        r'BACKPORT',                # BACKPORT -> ""
        r'[deps]:',                 # [deps]: -> ""
        r'feat(?:\([^)]*\))?:',     # feat: or feat(ui): -> ""
        r'bug(?:\([^)]*\))?:',      # bug: or bug(core): -> ""
        r'ci(?:\([^)]*\))?:'        # ci: or ci(workflow): -> ""
    ]
    for pattern in patterns:
        line = re.sub(pattern, '', line)

    # Replace multiple consecutive spaces with a single space
    line = re.sub(r'\s+', ' ', line)

    cleaned = line.strip()
    original_stripped = original.strip()
    if cleaned != original_stripped:
        print(f"Processed: {original_stripped} -> {cleaned}")
    return cleaned

def process_file(input_file: str, app_label: str) -> Tuple[List[str], List[str], List[str]]:
    jira_tickets: List[str] = []
    pr_numbers: List[str] = []
    processed_lines: List[str] = []
    #community_highlights: List[str] = []

    print("Processing file: ", input_file)

    with open(input_file, 'r') as f:
        for line in f:
            line = line.strip()
            should_process = line and not line.startswith('#')

            if should_process:
                tickets = extract_jira_tickets(line)
                jira_tickets.extend(tickets)

                prs = extract_pr_numbers(line)
                pr_numbers.extend(prs)
                processed_lines.append(process_line(line))
            else:
                processed_lines.append(line)


    # Remove duplicates while preserving order
    jira_tickets = list(dict.fromkeys(jira_tickets))
    pr_numbers = list(dict.fromkeys(pr_numbers))

    print("Jira tickets:", ",".join(jira_tickets))
    print("PR numbers:", ",".join(pr_numbers))
    print("Finished processing file: ", input_file)
    return jira_tickets, pr_numbers, processed_lines

def save_results(jira_tickets: List[str], pr_numbers: List[str], processed_lines: List[str],
                jira_file: str = 'jira_tickets.txt',
                pr_file: str = 'pr_numbers.txt',
                processed_file: str = 'processed_notes.txt') -> None:
    with open(jira_file, 'w') as f:
        f.write('\n'.join(jira_tickets))

    with open(pr_file, 'w') as f:
        f.write('\n'.join(pr_numbers))

    with open(processed_file, 'w') as f:
        f.write('\n'.join(processed_lines))

if __name__ == '__main__':
    input_file = 'release_notes.txt'
    jira_file = 'jira_tickets.txt'
    pr_file = 'pr_numbers.txt'
    processed_file = 'processed_notes.txt'

    if len(sys.argv) >= 2:
        input_file = sys.argv[1]
    if len(sys.argv) >= 3:
        jira_file = sys.argv[2]
    if len(sys.argv) >= 4:
        pr_file = sys.argv[3]
    if len(sys.argv) >= 5:
        processed_file = sys.argv[4]

    jira_tickets, pr_numbers, processed_lines = process_file(input_file)
    save_results(jira_tickets, pr_numbers, processed_lines, jira_file, pr_file, processed_file)