android/.github/scripts/release-notes/process_release_notes.py
2025-11-27 22:48:15 +00:00

217 lines
7.3 KiB
Python

import re
import sys
import subprocess
import json
import argparse
from typing import List, Tuple
def extract_jira_tickets(line: str) -> List[str]:
"""Find all Jira tickets in format ABC-123 (with any prefix/suffix)"""
return re.findall(r'[A-Z]+-\d+', line)
def extract_pr_numbers(line: str) -> List[str]:
"""Match PR numbers from GitHub format (#123)"""
return re.findall(r'#(\d+)', line)
def extract_pr_url(line: str) -> str:
"""Match PR URL from GitHub format https://github.com/foo/bar/pull/123
Returns:
The first PR URL found in the line, or empty string if no URL is found
"""
matches = re.findall(r'https://github\.com/[\w-]+/[\w.-]+/pull/\d+', line)
return matches[0] if matches else ""
def fetch_labels(github_pr_url: str) -> List[str]:
"""Fetch labels from a GitHub PR using the GitHub CLI."""
result = subprocess.run(
['gh', 'pr', 'view', github_pr_url, '--json', 'labels', '--jq', '.labels[].name'],
capture_output=True,
text=True,
check=True
)
return [label.strip() for label in result.stdout.strip().split('\n') if label.strip()]
def should_skip_pr(release_app_label: str, pr_labels: List[str]) -> bool:
"""Check if the PR should be skipped based on app labels.
Skip if there's at least one label that starts with "app:" but release_app_label isn't found.
Args:
release_app_label: The app label to look for (e.g., "app:password-manager")
pr_labels: List of labels from the PR
Returns:
True if the PR should be skipped, False otherwise
"""
pr_app_labels = [label for label in pr_labels if label.startswith('app:')]
# Skip if there are app labels but release_app_label is not among them
return len(pr_app_labels) > 0 and release_app_label not in pr_app_labels
def process_line(line: str) -> str:
"""Process a single line from release notes by removing Jira tickets, conventional commit prefixes and other common patterns.
Args:
line: A single line from release notes
Returns:
Processed line with tickets and prefixes removed
Example:
>>> process_line("[ABC-123] feat(ui): Add new button")
"Add new button"
"""
original = line
# Remove Jira ticket patterns:
line = re.sub(r'\[[A-Z]+-\d+\]', '', line) # [ABC-123] -> ""
line = re.sub(r'[A-Z]+-\d+:\s', '', line) # ABC-123: -> ""
line = re.sub(r'[A-Z]+-\d+\s-\s', '', line) # ABC-123 - -> ""
# Remove keywords and their variations
patterns = [
r'🍒', # 🍒 -> ""
r'BACKPORT', # BACKPORT -> ""
r'[deps]:', # [deps]: -> ""
r'feat(?:\([^)]*\))?:', # feat: or feat(ui): -> ""
r'bug(?:\([^)]*\))?:', # bug: or bug(core): -> ""
r'ci(?:\([^)]*\))?:' # ci: or ci(workflow): -> ""
]
for pattern in patterns:
line = re.sub(pattern, '', line)
# Replace multiple consecutive spaces with a single space
line = re.sub(r'\s+', ' ', line)
cleaned = line.strip()
original_stripped = original.strip()
if cleaned != original_stripped:
print(f"Processed: {original_stripped} -> {cleaned}")
return cleaned
def process_file(input_file: str, release_app_label: str) -> Tuple[List[str], List[str], List[str]]:
jira_tickets: List[str] = []
pr_numbers: List[str] = []
processed_lines: List[str] = []
debug_lines: List[str] = []
#community_highlights: List[str] = []
print("Processing file: ", input_file)
with open(input_file, 'r') as f:
for line in f:
line = line.strip()
should_process = line and line.startswith('* ')
if should_process:
pr_url = extract_pr_url(line)
pr_labels = []
# Fetch labels from PR URL if available
if pr_url:
pr_labels = fetch_labels(pr_url)
if should_skip_pr(release_app_label, pr_labels):
debug_lines.append(f"{line} | skipped - labels: {pr_labels}")
continue # skip the PR if it is not labeled with the app label
tickets = extract_jira_tickets(line)
jira_tickets.extend(tickets)
prs = extract_pr_numbers(line)
pr_numbers.extend(prs)
processed_lines.append(process_line(line))
debug_lines.append(f"{line} | labels: {pr_labels}")
else:
processed_lines.append(line)
if line == "":
debug_lines.append("")
else:
debug_lines.append(f"{line} | skipped - processing")
# Remove duplicates while preserving order
jira_tickets = list(dict.fromkeys(jira_tickets))
pr_numbers = list(dict.fromkeys(pr_numbers))
print("Jira tickets:", ",".join(jira_tickets))
print("PR numbers:", ",".join(pr_numbers))
print("Finished processing file: ", input_file)
return jira_tickets, pr_numbers, processed_lines, debug_lines
def save_results(jira_tickets: List[str], pr_numbers: List[str], processed_lines: List[str], debug_lines: List[str],
jira_file: str = 'jira_tickets.txt',
pr_file: str = 'pr_numbers.txt',
processed_file: str = 'processed_notes.txt',
debug_file: str = 'processed_notes_debug.txt'
) -> None:
with open(jira_file, 'w') as f:
f.write('\n'.join(jira_tickets))
with open(pr_file, 'w') as f:
f.write('\n'.join(pr_numbers))
with open(processed_file, 'w') as f:
f.write('\n'.join(processed_lines))
with open(debug_file, 'w') as f:
f.write('\n'.join(debug_lines))
def parse_args():
"""Parse command line arguments.
Returns:
Parsed arguments namespace
"""
parser = argparse.ArgumentParser(
description='Process release notes by extracting Jira tickets and PR numbers, and cleaning up the text.'
)
parser.add_argument(
'release_app_label',
help='Filter PRs by app label (e.g., app:password-manager)'
)
parser.add_argument(
'input_file',
default='release_notes.txt',
help='Input file containing release notes (default: release_notes.txt)'
)
parser.add_argument(
'--processed-filepath',
default='processed_notes.txt',
help='Output file for processed notes (default: processed_notes.txt)'
)
parser.add_argument(
'--jira-filepath',
default='jira_tickets.txt',
help='Output file for Jira tickets (default: jira_tickets.txt)'
)
parser.add_argument(
'--pr-filepath',
default='pr_numbers.txt',
help='Output file for PR numbers (default: pr_numbers.txt)'
)
parser.add_argument(
'--debug-filepath',
default='processed_notes_debug.txt',
help='Output file for debug notes (default: processed_notes_debug.txt)'
)
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
jira_tickets, pr_numbers, processed_lines, debug_lines = process_file(
args.input_file,
args.release_app_label
)
save_results(
jira_tickets,
pr_numbers,
processed_lines,
debug_lines,
args.jira_filepath,
args.pr_filepath,
args.processed_filepath,
args.debug_filepath
)