From 820d68d598f24131c366465087c6fdf0a4cf5b86 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 12:40:18 +0100 Subject: [PATCH 01/24] Enhance cherry-pick script with support for unmerged PRs and improved error handling - Added `--allow-unmerged` option to backport unmerged PRs directly using their commits. - Introduced `--merge-commits` option to control behavior for merge commits (skip or fail). - Enhanced error handling for cherry-pick conflicts and Git command failures. - Improved PR title and body formatting to include metadata and linked issues. - Updated workflow configuration to support new script options. --- .github/scripts/cherry_pick.py | 720 +++++++++++++++++++++++++++--- .github/workflows/cherry_pick.yml | 4 +- 2 files changed, 652 insertions(+), 72 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index a18a1254252f..d5001cbf94de 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 import os +import sys import datetime import logging import subprocess import argparse +import re +from typing import List, Optional, Tuple from github import Github, GithubException, GithubObject, Commit +import requests class CherryPickCreator: @@ -19,33 +23,6 @@ def __split(s: str, seps: str = ', \n'): result += __split(part, seps[1:]) return result - def __add_commit(c: str, single: bool): - commit = self.repo.get_commit(c) - pulls = commit.get_pulls() - if pulls.totalCount > 0: - pr = pulls.get_page(0)[0] - if single: - self.pr_title_list.append(pr.title) - else: - self.pr_title_list.append(f'commit {commit.sha}') - self.pr_body_list.append(f"* commit {commit.html_url}: {pr.title }") - else: - if single: - self.pr_title_list.append(f'cherry-pick commit {commit.sha}') - else: - self.pr_title_list.append(f'commit {commit.sha}') - self.pr_body_list.append(f"* commit {commit.html_url}") - self.commit_shas.append(commit.sha) - - def __add_pull(p: int, single: bool): - pull = self.repo.get_pull(p) - if single: - self.pr_title_list.append(f"{pull.title}") - else: - self.pr_title_list.append(f'PR {pull.number}') - self.pr_body_list.append(f"* PR {pull.html_url}") - self.commit_shas.append(pull.merge_commit_sha) - self.repo_name = os.environ["REPO"] self.target_branches = __split(args.target_branches) self.token = os.environ["TOKEN"] @@ -54,34 +31,377 @@ def __add_pull(p: int, single: bool): self.commit_shas: list[str] = [] self.pr_title_list: list[str] = [] self.pr_body_list: list[str] = [] + self.pull_requests: list = [] # Store PR objects for later use + self.pull_authors: list[str] = [] + self.workflow_triggerer = os.environ.get('GITHUB_ACTOR', 'unknown') + self.has_errors = False + self.merge_commits_mode = getattr(args, 'merge_commits', 'skip') # fail or skip + self.allow_unmerged = getattr(args, 'allow_unmerged', False) # Allow backporting unmerged PRs + self.created_backport_prs = [] # Store created backport PRs: [(target_branch, pr, has_conflicts), ...] + self.skipped_branches = [] # Store branches where PR was not created: [(target_branch, reason), ...] + self.backport_comments = [] # Store comment objects for editing: [(pull, comment), ...] + + self.dtm = datetime.datetime.now().strftime("%y%m%d-%H%M") + self.logger = logging.getLogger("cherry-pick") + # Get workflow run URL + run_id = os.getenv('GITHUB_RUN_ID') + if run_id: + try: + self.workflow_url = self.repo.get_workflow_run(int(run_id)).html_url + except (GithubException, ValueError): + self.workflow_url = None + else: + self.workflow_url = None + commits = __split(args.commits) for c in commits: - id = c.split('/')[-1] + id = c.split('/')[-1].strip() try: - __add_pull(int(id), len(commits) == 1) + self.__add_pull(int(id), len(commits) == 1) except ValueError: - __add_commit(id, len(commits) == 1) + self.__add_commit(id, len(commits) == 1) - self.dtm = datetime.datetime.now().strftime("%y%m%d-%H%M") - self.logger = logging.getLogger("cherry-pick") + def _expand_short_sha(self, short_sha: str) -> str: + """Expand short SHA to full SHA via GitHub API or git rev-parse""" + if len(short_sha) == 40: + return short_sha # already full + + if len(short_sha) < 7: + raise ValueError(f"SHA too short: {short_sha}. Minimum 7 characters required") + + # Try GitHub API first (works before cloning) try: - self.workflow_url = self.repo.get_workflow_run(int(os.getenv('GITHUB_RUN_ID', 0))).html_url - except: - self.workflow_url = None + commits = self.repo.get_commits(sha=short_sha) + if commits.totalCount > 0: + commit = commits[0] + if commit.sha.startswith(short_sha): + return commit.sha + except Exception as e: + self.logger.debug(f"Failed to find commit via GitHub API: {e}") + + # If GitHub API didn't find it, use git rev-parse (after cloning) + result = subprocess.run( + ['git', 'rev-parse', short_sha], + capture_output=True, + text=True, + check=True + ) + return result.stdout.strip() + + def __add_commit(self, c: str, single: bool): + """Add commit by SHA (supports short SHA)""" + # Expand short SHA to full + try: + expanded_sha = self._expand_short_sha(c) + except ValueError as e: + self.logger.error(f"Failed to expand SHA {c}: {e}") + raise + + commit = self.repo.get_commit(expanded_sha) + + # Check if this is a merge commit + is_merge_commit = commit.parents and len(commit.parents) > 1 + + if is_merge_commit: + if self.merge_commits_mode == 'fail': + raise ValueError(f"Commit {expanded_sha[:7]} is a merge commit. Use PR number instead or set --merge-commits skip") + elif self.merge_commits_mode == 'skip': + self.logger.info(f"Skipping merge commit {expanded_sha[:7]} (--merge-commits skip)") + return + + pulls = commit.get_pulls() + if pulls.totalCount > 0: + pr = pulls.get_page(0)[0] + # Check that PR was merged (unless allow_unmerged is set) + if not pr.merged: + if not self.allow_unmerged: + raise ValueError(f"PR #{pr.number} (associated with commit {expanded_sha[:7]}) is not merged. Cannot backport unmerged PR. Use --allow-unmerged to allow") + else: + self.logger.info(f"PR #{pr.number} (associated with commit {expanded_sha[:7]}) is not merged, but --allow-unmerged is set, proceeding") + # Add PR to pull_requests if not already added (for comments and issues) + existing_pr_numbers = [p.number for p in self.pull_requests] + if pr.number not in existing_pr_numbers: + self.pull_requests.append(pr) + self.pull_authors.append(pr.user.login) + if single: + self.pr_title_list.append(pr.title) + else: + self.pr_title_list.append(f'commit {commit.sha[:7]}') + self.pr_body_list.append(f"* commit {commit.html_url}: {pr.title}") + else: + # Try to get commit author if no PR found + try: + commit_author = commit.author.login if commit.author else None + if commit_author and commit_author not in self.pull_authors: + self.pull_authors.append(commit_author) + except: + pass + if single: + self.pr_title_list.append(f'cherry-pick commit {commit.sha[:7]}') + else: + self.pr_title_list.append(f'commit {commit.sha[:7]}') + self.pr_body_list.append(f"* commit {commit.html_url}") + self.commit_shas.append(commit.sha) + + def _get_commits_from_pr(self, pull) -> List[str]: + """Get list of commits from PR (excluding merge commits)""" + commits = [] + for commit in pull.get_commits(): + commit_obj = commit.commit + # Skip merge commits inside PR if mode is skip + if self.merge_commits_mode == 'skip': + # Check if commit is a merge commit + if commit_obj.parents and len(commit_obj.parents) > 1: + self.logger.info(f"Skipping merge commit {commit.sha[:7]} from PR #{pull.number}") + continue + commits.append(commit.sha) + return commits + + def __add_pull(self, p: int, single: bool): + """Add PR and get commits from it""" + pull = self.repo.get_pull(p) + + # Check that PR was merged (unless allow_unmerged is set) + if not pull.merged: + if not self.allow_unmerged: + raise ValueError(f"PR #{p} is not merged. Use --allow-unmerged to backport unmerged PRs") + else: + self.logger.info(f"PR #{p} is not merged, but --allow-unmerged is set, proceeding with commits from PR") + + self.pull_requests.append(pull) + self.pull_authors.append(pull.user.login) + + if single: + self.pr_title_list.append(f"{pull.title}") + else: + self.pr_title_list.append(f'PR {pull.number}') + self.pr_body_list.append(f"* PR {pull.html_url}") + + # Get commits from PR instead of merge_commit_sha + pr_commits = self._get_commits_from_pr(pull) + if not pr_commits: + raise ValueError(f"PR #{p} contains no commits to cherry-pick") + + # For unmerged PRs, always use commits from PR (no merge_commit_sha) + if not pull.merged: + self.commit_shas.extend(pr_commits) + self.logger.info(f"PR #{p} is unmerged, using {len(pr_commits)} commits from PR") + elif pull.merge_commit_sha: + # If PR was merged as merge commit, use individual commits + # Otherwise use merge_commit_sha (for squash/rebase merge) + # Check if merge_commit_sha is a merge commit + merge_commit = self.repo.get_commit(pull.merge_commit_sha) + if merge_commit.parents and len(merge_commit.parents) > 1: + # This is a merge commit, use individual commits from PR + self.commit_shas.extend(pr_commits) + self.logger.info(f"PR #{p} was merged as merge commit, using {len(pr_commits)} individual commits") + else: + # This is not a merge commit (squash/rebase), use merge_commit_sha + self.commit_shas.append(pull.merge_commit_sha) + self.logger.info(f"PR #{p} was merged as squash/rebase, using merge_commit_sha") + else: + # If merge_commit_sha is missing, use commits from PR + self.commit_shas.extend(pr_commits) + + def _validate_prs(self): + """Validate PR numbers""" + # Validate PRs from self.pull_requests (already loaded) + for pull in self.pull_requests: + try: + pull = self.repo.get_pull(pull.number) + if not pull.merged and not self.allow_unmerged: + raise ValueError(f"PR #{pull.number} is not merged. Use --allow-unmerged to allow backporting unmerged PRs") + except GithubException as e: + raise ValueError(f"PR #{pull.number} does not exist or is not accessible: {e}") + + def _validate_commits(self): + """Validate commits (SHA)""" + for commit_sha in self.commit_shas: + try: + commit = self.repo.get_commit(commit_sha) + # Check that commit exists + if not commit.sha: + raise ValueError(f"Commit {commit_sha} not found") + except GithubException as e: + raise ValueError(f"Commit {commit_sha} does not exist: {e}") + + def _validate_branches(self): + """Validate target branches""" + for branch in self.target_branches: + try: + self.repo.get_branch(branch) + except GithubException as e: + raise ValueError(f"Branch {branch} does not exist: {e}") + + def _validate_inputs(self): + """Validate all input data""" + if len(self.commit_shas) == 0: + raise ValueError("No commits to cherry-pick") + if len(self.target_branches) == 0: + raise ValueError("No target branches specified") + + self.logger.info("Validating input data...") + self._validate_prs() + self._validate_commits() + self._validate_branches() + self.logger.info("Input validation successful") + + def _is_commit_in_branch(self, commit_sha: str, branch_name: str) -> bool: + """Check if commit already exists in target branch""" + try: + # First fetch for up-to-date branch information + self.git_run("fetch", "origin", branch_name) + + # Check via merge-base + result = subprocess.run( + ['git', 'merge-base', '--is-ancestor', commit_sha, f'origin/{branch_name}'], + capture_output=True, + text=True + ) + return result.returncode == 0 + except Exception as e: + self.logger.warning(f"Failed to check if commit {commit_sha[:7]} exists in branch {branch_name}: {e}") + return False + + def _get_linked_issues_graphql(self, pr_number: int) -> List[str]: + """Get linked issues via GraphQL API""" + query = """ + query($owner: String!, $repo: String!, $prNumber: Int!) { + repository(owner: $owner, name: $repo) { + pullRequest(number: $prNumber) { + closingIssuesReferences(first: 100) { + nodes { + number + repository { + owner { login } + name + } + } + } + } + } + } + """ + + owner, repo_name = self.repo_name.split('/') + variables = { + "owner": owner, + "repo": repo_name, + "prNumber": pr_number + } + + try: + response = requests.post( + "https://api.github.com/graphql", + headers={"Authorization": f"token {self.token}"}, + json={"query": query, "variables": variables}, + timeout=10 + ) + response.raise_for_status() + + data = response.json() + if 'errors' in data: + self.logger.warning(f"GraphQL errors for PR #{pr_number}: {data['errors']}") + return [] + + issues = [] + nodes = data.get("data", {}).get("repository", {}).get("pullRequest", {}).get("closingIssuesReferences", {}).get("nodes", []) + for issue in nodes: + owner_name = issue["repository"]["owner"]["login"] + repo_name_issue = issue["repository"]["name"] + number = issue["number"] + if owner_name == owner and repo_name_issue == repo_name: + issues.append(f"#{number}") + else: + issues.append(f"{owner_name}/{repo_name_issue}#{number}") + + return issues + except Exception as e: + self.logger.warning(f"Failed to get linked issues via GraphQL for PR #{pr_number}: {e}") + return [] + + def _get_linked_issues(self) -> str: + """Get linked issues for all PRs""" + all_issues = [] + + for pull in self.pull_requests: + issues = self._get_linked_issues_graphql(pull.number) + + # If GraphQL didn't return issues, parse from PR body + if not issues and pull.body: + body_issues = re.findall(r'#(\d+)', pull.body) + issues = [f"#{num}" for num in body_issues] + + all_issues.extend(issues) + + # Remove duplicates + unique_issues = list(dict.fromkeys(all_issues)) + return ' '.join(unique_issues) if unique_issues else 'None' def pr_title(self, target_branch) -> str: + """Generate PR title""" if len(self.pr_title_list) == 1: - return f"{target_branch}: {self.pr_title_list[0]}" - return f"{target_branch}: cherry-pick {', '.join(self.pr_title_list)}" + title = f"[Backport {target_branch}] {self.pr_title_list[0]}" + else: + title = f"[Backport {target_branch}] cherry-pick {', '.join(self.pr_title_list)}" + + # Truncate long titles (GitHub limit ~256 characters) + if len(title) > 200: + title = title[:197] + "..." + + return title - def pr_body(self, with_wf: bool) -> str: + def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None) -> str: + """Generate PR body with improved format""" commits = '\n'.join(self.pr_body_list) - pr_body = f"Cherry-pick:\n{commits}\n" + + # Get linked issues + issue_refs = self._get_linked_issues() + + # Format author information + authors = ', '.join([f"@{author}" for author in set(self.pull_authors)]) if self.pull_authors else "Unknown" + + # Determine target branch for description + branch_desc = target_branch if target_branch else (', '.join(self.target_branches) if len(self.target_branches) == 1 else 'multiple branches') + + pr_body = f"""## Description +Backport to `{branch_desc}`. + +### Original PR(s) +{commits} + +### Metadata +- **Original PR author(s):** {authors} +- **Cherry-picked by:** @{self.workflow_triggerer} +- **Related issues:** {issue_refs} +""" + + if has_conflicts: + pr_body += """ +### Conflicts Require Manual Resolution + +This PR contains merge conflicts that require manual resolution. + +**How to resolve conflicts:** + +```bash +git fetch origin +git checkout +# Resolve conflicts in files +git add . +git commit --amend # or create new commit +git push +``` + +After resolving conflicts, mark this PR as ready for review. +""" + if with_wf: if self.workflow_url: - pr_body += f"\nPR was created by cherry-pick workflow [run]({self.workflow_url})\n" + pr_body += f"\n---\n\nPR was created by cherry-pick workflow [run]({self.workflow_url})" else: - pr_body += "\nPR was created by cherry-pick script\n" + pr_body += "\n---\n\nPR was created by cherry-pick script" + return pr_body def add_summary(self, msg): @@ -94,64 +414,311 @@ def add_summary(self, msg): def git_run(self, *args): args = ["git"] + list(args) - self.logger.info("run: %r", args) + self.logger.info("Executing git command: %r", args) try: output = subprocess.check_output(args).decode() except subprocess.CalledProcessError as e: - self.logger.error(e.output.decode()) + error_output = e.output.decode() if e.output else "" + stderr_output = e.stderr.decode() if e.stderr else "" + self.logger.error(f"Git command failed: {' '.join(args)}") + self.logger.error(f"Exit code: {e.returncode}") + if error_output: + self.logger.error(f"Stdout: {error_output}") + if stderr_output: + self.logger.error(f"Stderr: {stderr_output}") raise else: - self.logger.info("output:\n%s", output) + self.logger.debug("Command output:\n%s", output) return output + def _handle_cherry_pick_conflict(self, commit_sha: str): + """Handle cherry-pick conflict: commit the conflict""" + try: + # Check if there are conflicts (files with conflicts or unmerged paths) + result = subprocess.run( + ['git', 'status', '--porcelain'], + capture_output=True, + text=True, + check=True + ) + + # Also check for conflict markers + has_conflict_markers = False + if result.stdout.strip(): + # Check for conflict markers in files + status_lines = result.stdout.strip().split('\n') + for line in status_lines: + if line.startswith('UU') or line.startswith('AA') or line.startswith('DD'): + # Unmerged paths - definitely a conflict + has_conflict_markers = True + break + + if has_conflict_markers or result.stdout.strip(): + # There are changes (conflicts) + self.git_run("add", "-A") + self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") + self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") + return True + except Exception as e: + self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") + + return False + + def _create_initial_backport_comment(self): + """Create initial comment in original PRs about backport start""" + if not self.workflow_url: + self.logger.warning("Workflow URL not available, skipping initial comment") + return + + target_branches_str = ', '.join([f"`{b}`" for b in self.target_branches]) + initial_comment = f"Backport to {target_branches_str} in progress: [workflow run]({self.workflow_url})" + + for pull in self.pull_requests: + try: + comment = pull.create_issue_comment(initial_comment) + self.backport_comments.append((pull, comment)) + self.logger.info(f"Created initial backport comment in original PR #{pull.number}") + except GithubException as e: + self.logger.warning(f"Failed to create initial comment in original PR #{pull.number}: {e}") + + def _update_backport_comment(self): + """Update comment in original PRs with backport results""" + if not self.backport_comments: + return + + for pull, comment in self.backport_comments: + try: + total_branches = len(self.created_backport_prs) + len(self.skipped_branches) + + if total_branches == 0: + # No branches processed (should not happen) + updated_comment = f"Backport to {', '.join([f'`{b}`' for b in self.target_branches])} completed with no results" + if self.workflow_url: + updated_comment += f" - [workflow run]({self.workflow_url})" + elif total_branches == 1 and len(self.created_backport_prs) == 1: + # Single branch with PR - simple comment + target_branch, pr, has_conflicts = self.created_backport_prs[0] + status = "draft PR" if has_conflicts else "PR" + updated_comment = f"Backported to `{target_branch}`: {status} {pr.html_url}" + if has_conflicts: + updated_comment += " (contains conflicts requiring manual resolution)" + if self.workflow_url: + updated_comment += f" - [workflow run]({self.workflow_url})" + else: + # Multiple branches or mixed results - list all + updated_comment = "Backport results:\n" + + # List created PRs + for target_branch, pr, has_conflicts in self.created_backport_prs: + status = "draft PR" if has_conflicts else "PR" + conflict_note = " (contains conflicts requiring manual resolution)" if has_conflicts else "" + updated_comment += f"- `{target_branch}`: {status} {pr.html_url}{conflict_note}\n" + + # List skipped branches + for target_branch, reason in self.skipped_branches: + updated_comment += f"- `{target_branch}`: skipped ({reason})\n" + + if self.workflow_url: + updated_comment += f"\n[workflow run]({self.workflow_url})" + + comment.edit(updated_comment) + self.logger.info(f"Updated backport comment in original PR #{pull.number}") + except GithubException as e: + self.logger.warning(f"Failed to update comment in original PR #{pull.number}: {e}") + def create_pr_for_branch(self, target_branch): + """Create PR for target branch with conflict handling""" dev_branch_name = f"cherry-pick-{target_branch}-{self.dtm}" + has_conflicts = False + + # First fetch for up-to-date branch information + self.git_run("fetch", "origin", target_branch) + self.git_run("reset", "--hard") - self.git_run("checkout", target_branch) + # Create/update local branch based on origin/target_branch (-B overwrites if exists) + self.git_run("checkout", "-B", target_branch, f"origin/{target_branch}") self.git_run("checkout", "-b", dev_branch_name) - self.git_run("cherry-pick", "--allow-empty", *self.commit_shas) - self.git_run("push", "--set-upstream", "origin", dev_branch_name) - - pr = self.repo.create_pull( - target_branch, dev_branch_name, title=self.pr_title(target_branch), body=self.pr_body(True), maintainer_can_modify=True - ) + + # Filter commits that already exist in branch + commits_to_pick = [] + for commit_sha in self.commit_shas: + if self._is_commit_in_branch(commit_sha, target_branch): + self.logger.info(f"Commit {commit_sha[:7]} already exists in branch {target_branch}, skipping") + self.add_summary(f"Commit {commit_sha[:7]} already exists in branch {target_branch}, skipped") + else: + commits_to_pick.append(commit_sha) + + if not commits_to_pick: + reason = "all commits already exist in target branch" + self.logger.info(f"All commits already exist in branch {target_branch}, skipping PR creation") + self.add_summary(f"All commits already exist in branch {target_branch}, skipping PR creation") + self.skipped_branches.append((target_branch, reason)) + return + + # Cherry-pick commits + for commit_sha in commits_to_pick: + try: + self.git_run("cherry-pick", "--allow-empty", commit_sha) + except subprocess.CalledProcessError as e: + error_output = e.output.decode() if e.output else "" + # Check if this is a conflict or another error + if "CONFLICT" in error_output or "conflict" in error_output.lower(): + self.logger.warning(f"Conflict during cherry-pick of commit {commit_sha[:7]}") + if self._handle_cherry_pick_conflict(commit_sha): + has_conflicts = True + else: + # Failed to handle conflict, abort cherry-pick + self.git_run("cherry-pick", "--abort") + self.has_errors = True + error_msg = f"CHERRY_PICK_CONFLICT_ERROR: Failed to handle conflict for commit {commit_sha[:7]} in branch {target_branch}" + self.logger.error(error_msg) + self.add_summary(error_msg) + else: + # Another error + self.git_run("cherry-pick", "--abort") + raise + + # Push branch + try: + self.git_run("push", "--set-upstream", "origin", dev_branch_name) + except subprocess.CalledProcessError: + self.has_errors = True + raise + # Create PR (draft if there are conflicts) try: - pr.enable_automerge(merge_method='MERGE') - except BaseException as e: - logging.warning(f'Error while enable automerge with method MERGE: {e}') + pr = self.repo.create_pull( + base=target_branch, + head=dev_branch_name, + title=self.pr_title(target_branch), + body=self.pr_body(True, has_conflicts, target_branch), + maintainer_can_modify=True, + draft=has_conflicts + ) + except GithubException as e: + self.has_errors = True + self.logger.error(f"PR_CREATION_ERROR: Failed to create PR for branch {target_branch}: {e}") + raise + + # Assign workflow triggerer as assignee + if self.workflow_triggerer and self.workflow_triggerer != 'unknown': try: - pr.enable_automerge(merge_method='SQUASH') - except BaseException as f: - logging.warning(f'Error while enable automerge with method SQUASH: {f}') + pr.add_to_assignees(self.workflow_triggerer) + self.logger.info(f"Assigned {self.workflow_triggerer} as assignee to PR {pr.html_url}") + except GithubException as e: + self.logger.warning(f"Failed to assign {self.workflow_triggerer} as assignee to PR {pr.html_url}: {e}") + + if has_conflicts: + self.logger.info(f"Created draft PR {pr.html_url} for branch {target_branch} with conflicts") + self.add_summary(f"Branch {target_branch}: Draft PR {pr.html_url} created with conflicts") + else: + self.logger.info(f"Created PR {pr.html_url} for branch {target_branch}") + self.add_summary(f"Branch {target_branch}: PR {pr.html_url} created") - self.add_summary(f'{target_branch}: PR {pr.html_url} created') + # Enable automerge only if there are no conflicts + if not has_conflicts: + try: + pr.enable_automerge(merge_method='MERGE') + except BaseException as e: + self.logger.warning(f"AUTOMERGE_WARNING: Failed to enable automerge with method MERGE for PR {pr.html_url}: {e}") + try: + pr.enable_automerge(merge_method='SQUASH') + except BaseException as f: + self.logger.warning(f"AUTOMERGE_WARNING: Failed to enable automerge with method SQUASH for PR {pr.html_url}: {f}") + + # Store created PR for later comment + self.created_backport_prs.append((target_branch, pr, has_conflicts)) def process(self): + """Main processing method""" br = ', '.join([f'[{b}]({self.repo.html_url}/tree/{b})' for b in self.target_branches]) - self.logger.info(self.pr_title(br)) + self.logger.info(f"Starting cherry-pick process: {self.pr_title(br)}") self.add_summary(f"{self.pr_body(False)}to {br}") - if len(self.commit_shas) == 0 or len(self.target_branches) == 0: - self.add_summary("Noting to cherry-pick or no targets branches, my life is meaningless.") - return - self.git_run( - "clone", f"https://{self.token}@github.com/{self.repo_name}.git", "-c", "protocol.version=2", f"ydb-new-pr" - ) + + # Create initial comment in original PRs + self._create_initial_backport_comment() + + # Validate input data + try: + self._validate_inputs() + except ValueError as e: + error_msg = f"VALIDATION_ERROR: {e}" + self.logger.error(error_msg) + self.add_summary(error_msg) + sys.exit(1) + except Exception as e: + error_msg = f"VALIDATION_ERROR: Critical validation error: {e}" + self.logger.error(error_msg) + self.add_summary(error_msg) + sys.exit(1) + + # Clone repository + try: + self.git_run( + "clone", f"https://{self.token}@github.com/{self.repo_name}.git", "-c", "protocol.version=2", f"ydb-new-pr" + ) + except subprocess.CalledProcessError as e: + error_msg = f"REPOSITORY_CLONE_ERROR: Failed to clone repository {self.repo_name}: {e}" + self.logger.error(error_msg) + self.add_summary(error_msg) + sys.exit(1) + os.chdir(f"ydb-new-pr") + + # Process each target branch for target in self.target_branches: try: self.create_pr_for_branch(target) except GithubException as e: - self.add_summary(f'{target} error GithubException:\n```\n{e}\n```\n') + self.has_errors = True + error_msg = f"GITHUB_API_ERROR: Branch {target} - {type(e).__name__}: {e}" + self.logger.error(error_msg) + self.add_summary(f"Branch {target} error: {type(e).__name__}\n```\n{e}\n```") + # Track failed branch + reason = f"GitHub API error: {str(e)[:100]}" # Truncate long error messages + self.skipped_branches.append((target, reason)) except subprocess.CalledProcessError as e: - msg = f'`{target}` error `subprocess.CalledProcessError`: `{e}`' + self.has_errors = True + error_msg = f"GIT_COMMAND_ERROR: Branch {target} - Command failed with exit code {e.returncode}" + self.logger.error(error_msg) + if e.output: + self.logger.error(f"Command stdout: {e.output.decode()}") + if e.stderr: + self.logger.error(f"Command stderr: {e.stderr.decode()}") + summary_msg = f"Branch {target} error: subprocess.CalledProcessError (exit code {e.returncode})" if e.output: - msg += f'\nOutput:\n```\n{e.output.decode()}\n```' + summary_msg += f'\nStdout:\n```\n{e.output.decode()}\n```' if e.stderr: - msg += f'\nErrors:\n```\n{e.stderr.decode()}\n```' - self.add_summary(msg) + summary_msg += f'\nStderr:\n```\n{e.stderr.decode()}\n```' + self.add_summary(summary_msg) + # Track failed branch + error_output = e.output.decode() if e.output else "" + if "CONFLICT" in error_output or "conflict" in error_output.lower(): + reason = "cherry-pick conflict (failed to resolve)" + else: + reason = f"git command failed (exit code {e.returncode})" + self.skipped_branches.append((target, reason)) except BaseException as e: - self.add_summary(f'{target} error {type(e)}\n```\n{e}\n```') + self.has_errors = True + error_msg = f"UNEXPECTED_ERROR: Branch {target} - {type(e).__name__}: {e}" + self.logger.error(error_msg) + self.add_summary(f"Branch {target} error: {type(e).__name__}\n```\n{e}\n```") + # Track failed branch + reason = f"unexpected error: {type(e).__name__}" + self.skipped_branches.append((target, reason)) + + # Update comments in original PRs with backport results + self._update_backport_comment() + + # If there were errors, exit with error + if self.has_errors: + error_msg = "WORKFLOW_FAILED: Cherry-pick workflow completed with errors. Check logs above for details." + self.logger.error(error_msg) + self.add_summary(error_msg) + sys.exit(1) + + self.logger.info("WORKFLOW_SUCCESS: All cherry-pick operations completed successfully") + self.add_summary("All cherry-pick operations completed successfully") def main(): @@ -163,6 +730,17 @@ def main(): parser.add_argument( "--target-branches", help="List of branchs to cherry-pick. Separated by space, comma or line end." ) + parser.add_argument( + "--merge-commits", + choices=['fail', 'skip'], + default='skip', + help="How to handle merge commits inside PR: 'skip' (default) - skip merge commits, 'fail' - fail on merge commits" + ) + parser.add_argument( + "--allow-unmerged", + action='store_true', + help="Allow backporting unmerged PRs (uses commits from PR directly, not merge_commit_sha)" + ) args = parser.parse_args() log_fmt = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 59f75582ed03..abac745405b3 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -39,7 +39,7 @@ jobs: - name: install packages shell: bash run: | - pip install PyGithub==2.5.0 + pip install PyGithub==2.5.0 requests - name: configure shell: bash @@ -54,6 +54,8 @@ jobs: REPO: ${{ github.repository }} TOKEN: ${{ env.GH_TOKEN }} GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_ACTOR: ${{ github.actor }} + GITHUB_RUN_ID: ${{ github.run_id }} run: | ./.github/scripts/cherry_pick.py \ --commits="${{ inputs.commits }}" \ From 109c4ce17b7b1a3d85bffb66294a9b3f4574df63 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 12:55:18 +0100 Subject: [PATCH 02/24] Enhance cherry-pick script with changelog extraction and improved PR body formatting - Added methods to extract changelog category, entry, and description from PR body. - Improved PR body generation to include extracted changelog information and ensure validation requirements are met. - Updated workflow configuration to support new changelog features. --- .github/scripts/cherry_pick.py | 172 +++++++++++++++++++++++++++--- .github/workflows/cherry_pick.yml | 7 +- 2 files changed, 162 insertions(+), 17 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index d5001cbf94de..3f6413fc1fa4 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -338,6 +338,85 @@ def _get_linked_issues(self) -> str: unique_issues = list(dict.fromkeys(all_issues)) return ' '.join(unique_issues) if unique_issues else 'None' + def _extract_changelog_category(self, pr_body: str) -> Optional[str]: + """Extract Changelog category from PR body""" + if not pr_body: + return None + + # Match the category section + category_match = re.search(r"### Changelog category.*?\n(.*?)(\n###|$)", pr_body, re.DOTALL) + if not category_match: + return None + + # Extract categories (lines starting with *) + categories = [line.strip('* ').strip() for line in category_match.group(1).splitlines() if line.strip() and line.strip().startswith('*')] + + # Find the selected category - take the first non-empty category line + # The selected category is typically the one that's not commented out + for cat in categories: + cat_clean = cat.strip('* ').strip() + if cat_clean: + # Return the category as-is, without hardcoded validation + return cat_clean + + return None + + def _extract_changelog_entry(self, pr_body: str) -> Optional[str]: + """Extract Changelog entry from PR body""" + if not pr_body: + return None + + entry_match = re.search(r"### Changelog entry.*?\n(.*?)(\n###|$)", pr_body, re.DOTALL) + if not entry_match: + return None + + entry = entry_match.group(1).strip() + # Skip if it's just "..." or empty + if entry in ['...', '']: + return None + + return entry + + def _extract_description_for_reviewers(self, pr_body: str) -> Optional[str]: + """Extract Description for reviewers section from PR body""" + if not pr_body: + return None + + desc_match = re.search(r"### Description for reviewers.*?\n(.*?)(\n###|$)", pr_body, re.DOTALL) + if not desc_match: + return None + + desc = desc_match.group(1).strip() + # Skip if it's just "..." or empty + if desc in ['...', '']: + return None + + return desc + + def _get_changelog_info(self) -> Tuple[Optional[str], Optional[str], Optional[str]]: + """Get Changelog category, entry, and description from original PRs""" + category = None + entry = None + description = None + + # Try to get from first PR (usually there's one main PR) + for pull in self.pull_requests: + if pull.body: + cat = self._extract_changelog_category(pull.body) + if cat: + category = cat + ent = self._extract_changelog_entry(pull.body) + if ent: + entry = ent + desc = self._extract_description_for_reviewers(pull.body) + if desc: + description = desc + # If we found all, we're done + if category and entry and description: + break + + return category, entry, description + def pr_title(self, target_branch) -> str: """Generate PR title""" if len(self.pr_title_list) == 1: @@ -352,7 +431,7 @@ def pr_title(self, target_branch) -> str: return title def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None) -> str: - """Generate PR body with improved format""" + """Generate PR body with improved format that passes validation""" commits = '\n'.join(self.pr_body_list) # Get linked issues @@ -364,21 +443,67 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # Determine target branch for description branch_desc = target_branch if target_branch else (', '.join(self.target_branches) if len(self.target_branches) == 1 else 'multiple branches') - pr_body = f"""## Description -Backport to `{branch_desc}`. - -### Original PR(s) -{commits} - -### Metadata -- **Original PR author(s):** {authors} -- **Cherry-picked by:** @{self.workflow_triggerer} -- **Related issues:** {issue_refs} -""" + # Get Changelog category, entry, and description from original PR + changelog_category, changelog_entry, original_description = self._get_changelog_info() + # Build changelog entry if not found + if not changelog_entry: + if len(self.pull_requests) == 1: + pr_num = self.pull_requests[0].number + changelog_entry = f"Backport of PR #{pr_num} to `{branch_desc}`" + else: + pr_nums = ', '.join([f"#{p.number}" for p in self.pull_requests]) + changelog_entry = f"Backport of PRs {pr_nums} to `{branch_desc}`" + + # Ensure entry is at least 20 characters (validation requirement) + if len(changelog_entry) < 20: + changelog_entry = f"Backport to `{branch_desc}`: {changelog_entry}" + + # For Bugfix category, ensure there's an issue reference in changelog entry (validation requirement) + if changelog_category and changelog_category.startswith("Bugfix") and issue_refs and issue_refs != "None": + # Check if issue reference is already in entry + issue_patterns = [ + r"https://github.com/ydb-platform/[a-z\-]+/issues/\d+", + r"https://st.yandex-team.ru/[a-zA-Z]+-\d+", + r"#\d+", + r"[a-zA-Z]+-\d+" + ] + has_issue = any(re.search(pattern, changelog_entry) for pattern in issue_patterns) + if not has_issue: + # Add issue reference to entry + changelog_entry = f"{changelog_entry} ({issue_refs})" + + # Build category section - use template if category not found + if changelog_category: + category_section = f"* {changelog_category}" + else: + # Use full template if category not found - let user choose + category_section = """* New feature +* Experimental feature +* Improvement +* Performance improvement +* User Interface +* Bugfix +* Backward incompatible change +* Documentation (changelog entry is not required) +* Not for changelog (changelog entry is not required)""" + + # Build description for reviewers section + description_section = f"Backport to `{branch_desc}`.\n\n" + description_section += f"#### Original PR(s)\n{commits}\n\n" + description_section += f"#### Metadata\n" + description_section += f"- **Original PR author(s):** {authors}\n" + description_section += f"- **Cherry-picked by:** @{self.workflow_triggerer}\n" + description_section += f"- **Related issues:** {issue_refs}" + + # If original PR had description, append it + if original_description: + description_section += f"\n\n#### Original Description\n{original_description}" + + # Add conflicts section if needed if has_conflicts: - pr_body += """ -### Conflicts Require Manual Resolution + description_section += """ +#### Conflicts Require Manual Resolution This PR contains merge conflicts that require manual resolution. @@ -396,11 +521,26 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt After resolving conflicts, mark this PR as ready for review. """ + # Add workflow link if needed if with_wf: if self.workflow_url: - pr_body += f"\n---\n\nPR was created by cherry-pick workflow [run]({self.workflow_url})" + description_section += f"\n\n---\n\nPR was created by cherry-pick workflow [run]({self.workflow_url})" else: - pr_body += "\n---\n\nPR was created by cherry-pick script" + description_section += "\n\n---\n\nPR was created by cherry-pick script" + + # Build PR body according to template (Changelog entry and category must come first) + pr_body = f"""### Changelog entry + +{changelog_entry} + +### Changelog category + +{category_section} + +### Description for reviewers + +{description_section} +""" return pr_body diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index abac745405b3..0f06b7082402 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -19,6 +19,10 @@ on: default: "" description: Comma or space separated branches to cherry-pick required: true + allow_unmerged: + type: boolean + default: true + description: Allow backporting unmerged PRs (uses commits from PR directly) concurrency: group: ${{ github.workflow }} @@ -59,4 +63,5 @@ jobs: run: | ./.github/scripts/cherry_pick.py \ --commits="${{ inputs.commits }}" \ - --target-branches="${{ inputs.target_branches }}" + --target-branches="${{ inputs.target_branches }}" \ + ${{ inputs.allow_unmerged && '--allow-unmerged' || '' }} From 57d548eabb2fd640fbc56c6c65fc56c1995bbbe0 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 13:07:11 +0100 Subject: [PATCH 03/24] Enhance cherry-pick script to avoid branch name conflicts and improve backport comment handling - Updated datetime formatting to include seconds, preventing branch name conflicts. - Added method to find and update existing backport comments in original PRs, enhancing comment management. - Improved logging for comment creation and updates to provide clearer feedback on operations. --- .github/scripts/cherry_pick.py | 37 ++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 3f6413fc1fa4..317c5356f6d9 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -41,7 +41,8 @@ def __split(s: str, seps: str = ', \n'): self.skipped_branches = [] # Store branches where PR was not created: [(target_branch, reason), ...] self.backport_comments = [] # Store comment objects for editing: [(pull, comment), ...] - self.dtm = datetime.datetime.now().strftime("%y%m%d-%H%M") + # Use datetime with seconds to avoid branch name conflicts + self.dtm = datetime.datetime.now().strftime("%y%m%d-%H%M%S") self.logger = logging.getLogger("cherry-pick") # Get workflow run URL run_id = os.getenv('GITHUB_RUN_ID') @@ -604,8 +605,23 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): return False + def _find_existing_backport_comment(self, pull): + """Find existing backport comment in PR (created by previous workflow run)""" + try: + # Get all comments for this PR + comments = pull.get_issue_comments() + for comment in comments: + # Check if comment is from YDBot and contains "Backport" keyword + if comment.user.login == "YDBot" and "Backport" in comment.body: + # Check if it's an initial comment (contains "in progress") or a result comment + if "in progress" in comment.body or "Backport" in comment.body: + return comment + except Exception as e: + self.logger.debug(f"Failed to find existing comment in PR #{pull.number}: {e}") + return None + def _create_initial_backport_comment(self): - """Create initial comment in original PRs about backport start""" + """Create or update initial comment in original PRs about backport start""" if not self.workflow_url: self.logger.warning("Workflow URL not available, skipping initial comment") return @@ -615,11 +631,20 @@ def _create_initial_backport_comment(self): for pull in self.pull_requests: try: - comment = pull.create_issue_comment(initial_comment) - self.backport_comments.append((pull, comment)) - self.logger.info(f"Created initial backport comment in original PR #{pull.number}") + # Try to find existing comment + existing_comment = self._find_existing_backport_comment(pull) + if existing_comment: + # Update existing comment + existing_comment.edit(initial_comment) + self.backport_comments.append((pull, existing_comment)) + self.logger.info(f"Updated existing backport comment in original PR #{pull.number}") + else: + # Create new comment + comment = pull.create_issue_comment(initial_comment) + self.backport_comments.append((pull, comment)) + self.logger.info(f"Created initial backport comment in original PR #{pull.number}") except GithubException as e: - self.logger.warning(f"Failed to create initial comment in original PR #{pull.number}: {e}") + self.logger.warning(f"Failed to create/update initial comment in original PR #{pull.number}: {e}") def _update_backport_comment(self): """Update comment in original PRs with backport results""" From 7c39046f81cda76c3c53b8704d7e5defce648f08 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 14:16:29 +0100 Subject: [PATCH 04/24] Enhance cherry-pick script to include detailed conflict resolution information - Updated the `created_backport_prs` structure to store conflicted files alongside target branches and PRs. - Improved the PR body generation to include a section for files with conflicts, providing links for easier resolution. - Enhanced conflict handling to return a list of conflicted files, improving logging and user feedback during cherry-pick operations. --- .github/scripts/cherry_pick.py | 79 ++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 317c5356f6d9..ed4e4d06f011 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -37,7 +37,7 @@ def __split(s: str, seps: str = ', \n'): self.has_errors = False self.merge_commits_mode = getattr(args, 'merge_commits', 'skip') # fail or skip self.allow_unmerged = getattr(args, 'allow_unmerged', False) # Allow backporting unmerged PRs - self.created_backport_prs = [] # Store created backport PRs: [(target_branch, pr, has_conflicts), ...] + self.created_backport_prs = [] # Store created backport PRs: [(target_branch, pr, has_conflicts, conflict_files), ...] self.skipped_branches = [] # Store branches where PR was not created: [(target_branch, reason), ...] self.backport_comments = [] # Store comment objects for editing: [(pull, comment), ...] @@ -431,7 +431,7 @@ def pr_title(self, target_branch) -> str: return title - def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None) -> str: + def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None, dev_branch_name: Optional[str] = None, conflict_files: Optional[List[str]] = None) -> str: """Generate PR body with improved format that passes validation""" commits = '\n'.join(self.pr_body_list) @@ -490,8 +490,7 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt * Not for changelog (changelog entry is not required)""" # Build description for reviewers section - description_section = f"Backport to `{branch_desc}`.\n\n" - description_section += f"#### Original PR(s)\n{commits}\n\n" + description_section = f"#### Original PR(s)\n{commits}\n\n" description_section += f"#### Metadata\n" description_section += f"- **Original PR author(s):** {authors}\n" description_section += f"- **Cherry-picked by:** @{self.workflow_triggerer}\n" @@ -503,19 +502,32 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # Add conflicts section if needed if has_conflicts: - description_section += """ + # Use dev_branch_name if provided, otherwise use target_branch as fallback + branch_for_instructions = dev_branch_name if dev_branch_name else (target_branch if target_branch else "") + + # Build conflicted files list with links + conflicted_files_section = "" + if conflict_files: + conflicted_files_section = "\n\n**Files with conflicts:**\n" + for file_path in conflict_files: + # Create link to file in GitHub branch (will show conflict markers) + # Format: https://github.com/{repo}/blob/{branch}/{file_path} + file_link = f"https://github.com/{self.repo_name}/blob/{branch_for_instructions}/{file_path}" + conflicted_files_section += f"- [{file_path}]({file_link})\n" + + description_section += f""" #### Conflicts Require Manual Resolution -This PR contains merge conflicts that require manual resolution. +This PR contains merge conflicts that require manual resolution.{conflicted_files_section} **How to resolve conflicts:** ```bash -git fetch origin -git checkout +git fetch origin +git checkout --track origin/{branch_for_instructions} # Resolve conflicts in files git add . -git commit --amend # or create new commit +git commit -m "Resolved merge conflicts" git push ``` @@ -573,7 +585,8 @@ def git_run(self, *args): return output def _handle_cherry_pick_conflict(self, commit_sha: str): - """Handle cherry-pick conflict: commit the conflict""" + """Handle cherry-pick conflict: commit the conflict and return list of conflicted files""" + conflict_files = [] try: # Check if there are conflicts (files with conflicts or unmerged paths) result = subprocess.run( @@ -592,18 +605,42 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): if line.startswith('UU') or line.startswith('AA') or line.startswith('DD'): # Unmerged paths - definitely a conflict has_conflict_markers = True - break + # Extract filename (after status code) + # Format: "XY filename" where XY is 2-char status, then space, then filename + # More robust: split by whitespace and take everything after status code + parts = line.split(None, 1) # Split on whitespace, max 1 split + if len(parts) > 1: + file_path = parts[1].strip() + if file_path: + conflict_files.append(file_path) if has_conflict_markers or result.stdout.strip(): + # If we didn't find unmerged paths, try to find files with conflict markers + if not conflict_files: + # Use git diff to find files with conflict markers + try: + diff_result = subprocess.run( + ['git', 'diff', '--name-only', '--diff-filter=U'], + capture_output=True, + text=True, + check=True + ) + if diff_result.stdout.strip(): + conflict_files = [f.strip() for f in diff_result.stdout.strip().split('\n') if f.strip()] + except subprocess.CalledProcessError: + pass + # There are changes (conflicts) self.git_run("add", "-A") self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") - return True + if conflict_files: + self.logger.info(f"Conflicted files: {', '.join(conflict_files)}") + return True, conflict_files except Exception as e: self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") - return False + return False, [] def _find_existing_backport_comment(self, pull): """Find existing backport comment in PR (created by previous workflow run)""" @@ -662,7 +699,7 @@ def _update_backport_comment(self): updated_comment += f" - [workflow run]({self.workflow_url})" elif total_branches == 1 and len(self.created_backport_prs) == 1: # Single branch with PR - simple comment - target_branch, pr, has_conflicts = self.created_backport_prs[0] + target_branch, pr, has_conflicts, conflict_files = self.created_backport_prs[0] status = "draft PR" if has_conflicts else "PR" updated_comment = f"Backported to `{target_branch}`: {status} {pr.html_url}" if has_conflicts: @@ -674,7 +711,7 @@ def _update_backport_comment(self): updated_comment = "Backport results:\n" # List created PRs - for target_branch, pr, has_conflicts in self.created_backport_prs: + for target_branch, pr, has_conflicts, conflict_files in self.created_backport_prs: status = "draft PR" if has_conflicts else "PR" conflict_note = " (contains conflicts requiring manual resolution)" if has_conflicts else "" updated_comment += f"- `{target_branch}`: {status} {pr.html_url}{conflict_note}\n" @@ -695,6 +732,7 @@ def create_pr_for_branch(self, target_branch): """Create PR for target branch with conflict handling""" dev_branch_name = f"cherry-pick-{target_branch}-{self.dtm}" has_conflicts = False + all_conflict_files = [] # Collect all conflicted files across all commits # First fetch for up-to-date branch information self.git_run("fetch", "origin", target_branch) @@ -729,8 +767,13 @@ def create_pr_for_branch(self, target_branch): # Check if this is a conflict or another error if "CONFLICT" in error_output or "conflict" in error_output.lower(): self.logger.warning(f"Conflict during cherry-pick of commit {commit_sha[:7]}") - if self._handle_cherry_pick_conflict(commit_sha): + conflict_handled, conflict_files = self._handle_cherry_pick_conflict(commit_sha) + if conflict_handled: has_conflicts = True + # Add conflicted files to the list (avoid duplicates) + for file_path in conflict_files: + if file_path not in all_conflict_files: + all_conflict_files.append(file_path) else: # Failed to handle conflict, abort cherry-pick self.git_run("cherry-pick", "--abort") @@ -756,7 +799,7 @@ def create_pr_for_branch(self, target_branch): base=target_branch, head=dev_branch_name, title=self.pr_title(target_branch), - body=self.pr_body(True, has_conflicts, target_branch), + body=self.pr_body(True, has_conflicts, target_branch, dev_branch_name, all_conflict_files if all_conflict_files else None), maintainer_can_modify=True, draft=has_conflicts ) @@ -792,7 +835,7 @@ def create_pr_for_branch(self, target_branch): self.logger.warning(f"AUTOMERGE_WARNING: Failed to enable automerge with method SQUASH for PR {pr.html_url}: {f}") # Store created PR for later comment - self.created_backport_prs.append((target_branch, pr, has_conflicts)) + self.created_backport_prs.append((target_branch, pr, has_conflicts, all_conflict_files)) def process(self): """Main processing method""" From 4c9fad3b1ffbd60c90e2fbdb055029d3922a1470 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 14:20:32 +0100 Subject: [PATCH 05/24] Add PR template and category definitions for validation - Introduced a new `pr_template.py` file containing PR template and issue reference patterns for the YDB project. - Updated `validate_pr_description.py` to utilize the new template and patterns, ensuring consistency in PR descriptions. - Enhanced `cherry_pick.py` to import category definitions and streamline changelog category handling. --- .../validate_pr_description/pr_template.py | 60 +++++++++++++++++++ .../validate_pr_description.py | 60 ++++--------------- .github/scripts/cherry_pick.py | 27 ++++----- 3 files changed, 84 insertions(+), 63 deletions(-) create mode 100644 .github/actions/validate_pr_description/pr_template.py diff --git a/.github/actions/validate_pr_description/pr_template.py b/.github/actions/validate_pr_description/pr_template.py new file mode 100644 index 000000000000..0ffb87e9da00 --- /dev/null +++ b/.github/actions/validate_pr_description/pr_template.py @@ -0,0 +1,60 @@ +""" +PR template and categories definitions for YDB project. +Used by both validate_pr_description.py and cherry_pick.py to ensure consistency. +""" + +# Issue reference patterns for validation +ISSUE_PATTERNS = [ + r"https://github.com/ydb-platform/[a-z\-]+/issues/\d+", + r"https://st.yandex-team.ru/[a-zA-Z]+-\d+", + r"#\d+", + r"[a-zA-Z]+-\d+" +] + +# Full PR template +PULL_REQUEST_TEMPLATE = """### Changelog entry + +... + +### Changelog category + +* New feature +* Experimental feature +* Improvement +* Performance improvement +* User Interface +* Bugfix +* Backward incompatible change +* Documentation (changelog entry is not required) +* Not for changelog (changelog entry is not required)""" + +# Categories that require changelog entry +FOR_CHANGELOG_CATEGORIES = [ + "New feature", + "Experimental feature", + "User Interface", + "Improvement", + "Performance improvement", + "Bugfix", + "Backward incompatible change" +] + +# Categories that don't require changelog entry +NOT_FOR_CHANGELOG_CATEGORIES = [ + "Documentation (changelog entry is not required)", + "Not for changelog (changelog entry is not required)" +] + +# All valid categories +ALL_CATEGORIES = FOR_CHANGELOG_CATEGORIES + NOT_FOR_CHANGELOG_CATEGORIES + + +def get_category_section_template() -> str: + """Get the category section template as a string (for cherry_pick.py)""" + return "\n".join([f"* {cat}" for cat in ALL_CATEGORIES]) + + +def get_category_section_for_selected(category: str) -> str: + """Get category section with only selected category marked""" + return f"* {category}" + diff --git a/.github/actions/validate_pr_description/validate_pr_description.py b/.github/actions/validate_pr_description/validate_pr_description.py index 8996a0d552dd..bbc00af3c5fe 100644 --- a/.github/actions/validate_pr_description/validate_pr_description.py +++ b/.github/actions/validate_pr_description/validate_pr_description.py @@ -1,31 +1,13 @@ import sys import re from typing import Tuple - -issue_patterns = [ - r"https://github.com/ydb-platform/[a-z\-]+/issues/\d+", - r"https://st.yandex-team.ru/[a-zA-Z]+-\d+", - r"#\d+", - r"[a-zA-Z]+-\d+" -] - -pull_request_template = """ -### Changelog entry - -... - -### Changelog category - -* New feature -* Experimental feature -* Improvement -* Performance improvement -* User Interface -* Bugfix -* Backward incompatible change -* Documentation (changelog entry is not required) -* Not for changelog (changelog entry is not required) -""" +from pr_template import ( + ISSUE_PATTERNS, + PULL_REQUEST_TEMPLATE, + FOR_CHANGELOG_CATEGORIES, + NOT_FOR_CHANGELOG_CATEGORIES, + ALL_CATEGORIES +) def validate_pr_description(description, is_not_for_cl_valid=True) -> bool: try: @@ -44,7 +26,7 @@ def check_pr_description(description, is_not_for_cl_valid=True) -> Tuple[bool, s if "### Changelog category" not in description and "### Changelog entry" not in description: return is_not_for_cl_valid, "Changelog category and entry sections are not found." - if pull_request_template.strip() in description.strip(): + if PULL_REQUEST_TEMPLATE.strip() in description.strip(): return is_not_for_cl_valid, "Pull request template as is." # Extract changelog category section @@ -62,34 +44,18 @@ def check_pr_description(description, is_not_for_cl_valid=True) -> Tuple[bool, s return False, txt category = categories[0] - for_cl_categories = [ - "New feature", - "Experimental feature", - "User Interface", - "Improvement", - "Performance improvement", - "Bugfix", - "Backward incompatible change" - ] - - not_for_cl_categories = [ - "Documentation (changelog entry is not required)", - "Not for changelog (changelog entry is not required)" - ] - - valid_categories = for_cl_categories + not_for_cl_categories - - if not any(cat.startswith(category) for cat in valid_categories): + + if not any(cat.startswith(category) for cat in ALL_CATEGORIES): txt = f"Invalid Changelog category: {category}" print(f"::warning::{txt}") return False, txt - if not is_not_for_cl_valid and any(cat.startswith(category) for cat in not_for_cl_categories): + if not is_not_for_cl_valid and any(cat.startswith(category) for cat in NOT_FOR_CHANGELOG_CATEGORIES): txt = f"Category is not for changelog: {category}" print(f"::notice::{txt}") return False, txt - if not any(cat.startswith(category) for cat in not_for_cl_categories): + if not any(cat.startswith(category) for cat in NOT_FOR_CHANGELOG_CATEGORIES): entry_section = re.search(r"### Changelog entry.*?\n(.*?)(\n###|$)", description, re.DOTALL) if not entry_section or len(entry_section.group(1).strip()) < 20: txt = "The changelog entry is less than 20 characters or missing." @@ -100,7 +66,7 @@ def check_pr_description(description, is_not_for_cl_valid=True) -> Tuple[bool, s def check_issue_pattern(issue_pattern): return re.search(issue_pattern, description) - if not any(check_issue_pattern(issue_pattern) for issue_pattern in issue_patterns): + if not any(check_issue_pattern(issue_pattern) for issue_pattern in ISSUE_PATTERNS): txt = "Bugfix requires a linked issue in the changelog entry" print(f"::warning::{txt}") return False, txt diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index ed4e4d06f011..4b4dccc391ff 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -10,6 +10,15 @@ from github import Github, GithubException, GithubObject, Commit import requests +# Import PR template and categories from validate_pr_description action +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'actions', 'validate_pr_description')) +from pr_template import ( + ISSUE_PATTERNS, + FOR_CHANGELOG_CATEGORIES, + NOT_FOR_CHANGELOG_CATEGORIES, + get_category_section_template +) + class CherryPickCreator: def __init__(self, args): @@ -463,13 +472,7 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # For Bugfix category, ensure there's an issue reference in changelog entry (validation requirement) if changelog_category and changelog_category.startswith("Bugfix") and issue_refs and issue_refs != "None": # Check if issue reference is already in entry - issue_patterns = [ - r"https://github.com/ydb-platform/[a-z\-]+/issues/\d+", - r"https://st.yandex-team.ru/[a-zA-Z]+-\d+", - r"#\d+", - r"[a-zA-Z]+-\d+" - ] - has_issue = any(re.search(pattern, changelog_entry) for pattern in issue_patterns) + has_issue = any(re.search(pattern, changelog_entry) for pattern in ISSUE_PATTERNS) if not has_issue: # Add issue reference to entry changelog_entry = f"{changelog_entry} ({issue_refs})" @@ -479,15 +482,7 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt category_section = f"* {changelog_category}" else: # Use full template if category not found - let user choose - category_section = """* New feature -* Experimental feature -* Improvement -* Performance improvement -* User Interface -* Bugfix -* Backward incompatible change -* Documentation (changelog entry is not required) -* Not for changelog (changelog entry is not required)""" + category_section = get_category_section_template() # Build description for reviewers section description_section = f"#### Original PR(s)\n{commits}\n\n" From 6abc803262cefbb9c1e6e8ae289d717f92eddfb1 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 15:19:10 +0100 Subject: [PATCH 06/24] Enhance cherry-pick script with improved error handling and conflict resolution - Added detailed error handling for the `git rev-parse` command to ensure the repository is cloned before execution. - Updated exception handling to specify the type of error encountered during PR fetching and automerge attempts. - Improved conflict detection logic to ensure all changes are captured and logged appropriately. - Enhanced logging for conflict resolution, providing clearer feedback on the status of conflicted files. --- .github/scripts/cherry_pick.py | 89 +++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 4b4dccc391ff..8989525a89a3 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -90,13 +90,18 @@ def _expand_short_sha(self, short_sha: str) -> str: self.logger.debug(f"Failed to find commit via GitHub API: {e}") # If GitHub API didn't find it, use git rev-parse (after cloning) - result = subprocess.run( - ['git', 'rev-parse', short_sha], - capture_output=True, - text=True, - check=True - ) - return result.stdout.strip() + # Note: This requires the repository to be cloned first (done in process() method) + try: + result = subprocess.run( + ['git', 'rev-parse', short_sha], + capture_output=True, + text=True, + check=True + ) + return result.stdout.strip() + except (subprocess.CalledProcessError, FileNotFoundError) as e: + # Repository not cloned yet or git command failed + raise ValueError(f"Failed to expand SHA {short_sha} via git rev-parse. Repository must be cloned first. Error: {e}") def __add_commit(self, c: str, single: bool): """Add commit by SHA (supports short SHA)""" @@ -144,7 +149,7 @@ def __add_commit(self, c: str, single: bool): commit_author = commit.author.login if commit.author else None if commit_author and commit_author not in self.pull_authors: self.pull_authors.append(commit_author) - except: + except AttributeError: pass if single: self.pr_title_list.append(f'cherry-pick commit {commit.sha[:7]}') @@ -218,9 +223,9 @@ def _validate_prs(self): # Validate PRs from self.pull_requests (already loaded) for pull in self.pull_requests: try: - pull = self.repo.get_pull(pull.number) - if not pull.merged and not self.allow_unmerged: - raise ValueError(f"PR #{pull.number} is not merged. Use --allow-unmerged to allow backporting unmerged PRs") + fetched_pull = self.repo.get_pull(pull.number) + if not fetched_pull.merged and not self.allow_unmerged: + raise ValueError(f"PR #{fetched_pull.number} is not merged. Use --allow-unmerged to allow backporting unmerged PRs") except GithubException as e: raise ValueError(f"PR #{pull.number} does not exist or is not accessible: {e}") @@ -263,10 +268,12 @@ def _is_commit_in_branch(self, commit_sha: str, branch_name: str) -> bool: self.git_run("fetch", "origin", branch_name) # Check via merge-base + # Note: --is-ancestor returns 0 if commit is ancestor, 1 if not, so non-zero is expected for "not in branch" result = subprocess.run( ['git', 'merge-base', '--is-ancestor', commit_sha, f'origin/{branch_name}'], capture_output=True, - text=True + text=True, + check=False # Don't raise on non-zero exit (expected for "not ancestor") ) return result.returncode == 0 except Exception as e: @@ -364,7 +371,7 @@ def _extract_changelog_category(self, pr_body: str) -> Optional[str]: # Find the selected category - take the first non-empty category line # The selected category is typically the one that's not commented out for cat in categories: - cat_clean = cat.strip('* ').strip() + cat_clean = cat.strip() if cat_clean: # Return the category as-is, without hardcoded validation return cat_clean @@ -451,7 +458,7 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt authors = ', '.join([f"@{author}" for author in set(self.pull_authors)]) if self.pull_authors else "Unknown" # Determine target branch for description - branch_desc = target_branch if target_branch else (', '.join(self.target_branches) if len(self.target_branches) == 1 else 'multiple branches') + branch_desc = target_branch if target_branch else ', '.join(self.target_branches) # Get Changelog category, entry, and description from original PR changelog_category, changelog_entry, original_description = self._get_changelog_info() @@ -609,29 +616,28 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): if file_path: conflict_files.append(file_path) - if has_conflict_markers or result.stdout.strip(): - # If we didn't find unmerged paths, try to find files with conflict markers - if not conflict_files: - # Use git diff to find files with conflict markers - try: - diff_result = subprocess.run( - ['git', 'diff', '--name-only', '--diff-filter=U'], - capture_output=True, - text=True, - check=True - ) - if diff_result.stdout.strip(): - conflict_files = [f.strip() for f in diff_result.stdout.strip().split('\n') if f.strip()] - except subprocess.CalledProcessError: - pass - - # There are changes (conflicts) - self.git_run("add", "-A") - self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") - self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") - if conflict_files: - self.logger.info(f"Conflicted files: {', '.join(conflict_files)}") - return True, conflict_files + # If we didn't find unmerged paths, try to find files with conflict markers + if not conflict_files: + # Use git diff to find files with conflict markers + try: + diff_result = subprocess.run( + ['git', 'diff', '--name-only', '--diff-filter=U'], + capture_output=True, + text=True, + check=True + ) + if diff_result.stdout.strip(): + conflict_files = [f.strip() for f in diff_result.stdout.strip().split('\n') if f.strip()] + except subprocess.CalledProcessError: + pass + + # There are changes (conflicts or other modifications) + self.git_run("add", "-A") + self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") + self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") + if conflict_files: + self.logger.info(f"Conflicted files: {', '.join(conflict_files)}") + return True, conflict_files except Exception as e: self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") @@ -776,6 +782,9 @@ def create_pr_for_branch(self, target_branch): error_msg = f"CHERRY_PICK_CONFLICT_ERROR: Failed to handle conflict for commit {commit_sha[:7]} in branch {target_branch}" self.logger.error(error_msg) self.add_summary(error_msg) + reason = f"cherry-pick conflict (failed to resolve)" + self.skipped_branches.append((target_branch, reason)) + return else: # Another error self.git_run("cherry-pick", "--abort") @@ -822,11 +831,11 @@ def create_pr_for_branch(self, target_branch): if not has_conflicts: try: pr.enable_automerge(merge_method='MERGE') - except BaseException as e: + except Exception as e: self.logger.warning(f"AUTOMERGE_WARNING: Failed to enable automerge with method MERGE for PR {pr.html_url}: {e}") try: pr.enable_automerge(merge_method='SQUASH') - except BaseException as f: + except Exception as f: self.logger.warning(f"AUTOMERGE_WARNING: Failed to enable automerge with method SQUASH for PR {pr.html_url}: {f}") # Store created PR for later comment @@ -931,7 +940,7 @@ def main(): help="List of commits to cherry-pick. Can be represented as full or short commit SHA, PR number or URL to commit or PR. Separated by space, comma or line end.", ) parser.add_argument( - "--target-branches", help="List of branchs to cherry-pick. Separated by space, comma or line end." + "--target-branches", help="List of branches to cherry-pick. Separated by space, comma or line end." ) parser.add_argument( "--merge-commits", From f1ffacfc41e40b6f163829dfa0f213f417f77e64 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 15:53:02 +0100 Subject: [PATCH 07/24] Enhance cherry-pick script with improved conflict handling and logging - Added detailed diagnostics for `git merge-base` command to better handle ancestor checks and error reporting. - Updated conflict resolution logic to capture line numbers of conflicts, improving user feedback on conflicted files. - Enhanced PR body generation to include links to specific lines in files with conflicts, facilitating easier resolution. - Introduced a method to find the first line with conflict markers in files, improving conflict detection accuracy. --- .github/scripts/cherry_pick.py | 155 ++++++++++++++++++++++++++++----- 1 file changed, 135 insertions(+), 20 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 8989525a89a3..c8b5bd0d639f 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -268,16 +268,64 @@ def _is_commit_in_branch(self, commit_sha: str, branch_name: str) -> bool: self.git_run("fetch", "origin", branch_name) # Check via merge-base - # Note: --is-ancestor returns 0 if commit is ancestor, 1 if not, so non-zero is expected for "not in branch" + # Note: --is-ancestor returns: + # - 0 if commit is ancestor (exists in branch) + # - 1 if commit is NOT ancestor (normal case, not an error) + # - >1 for real errors (commit doesn't exist, git problems, etc.) result = subprocess.run( ['git', 'merge-base', '--is-ancestor', commit_sha, f'origin/{branch_name}'], capture_output=True, text=True, check=False # Don't raise on non-zero exit (expected for "not ancestor") ) - return result.returncode == 0 + + # Exit code 0: commit is ancestor (exists in branch) + if result.returncode == 0: + return True + + # Exit code 1: commit is NOT ancestor (normal case, not an error) + if result.returncode == 1: + return False + + # Exit code > 1: real error - verify if commit exists for better diagnostics + error_msg = result.stderr.strip() if result.stderr else "Unknown error" + + # Try to verify if commit exists + try: + verify_result = subprocess.run( + ['git', 'rev-parse', '--verify', commit_sha], + capture_output=True, + text=True, + check=False + ) + if verify_result.returncode != 0: + # Commit doesn't exist - this is a real problem + self.logger.error( + f"Commit {commit_sha[:7]} does not exist in repository. " + f"Original error: {error_msg}" + ) + # Still return False - cherry-pick will fail with clear error + else: + # Commit exists but merge-base failed - might be temporary issue + self.logger.warning( + f"git merge-base failed (exit {result.returncode}) but commit exists. " + f"Error: {error_msg}. This might be a temporary issue." + ) + except Exception: + # Verification failed, just log original error + pass + + self.logger.warning( + f"git merge-base failed with exit code {result.returncode} " + f"for commit {commit_sha[:7]} in branch {branch_name}: {error_msg}" + ) + return False + except Exception as e: - self.logger.warning(f"Failed to check if commit {commit_sha[:7]} exists in branch {branch_name}: {e}") + # Catch other unexpected errors (network issues, permissions, etc.) + self.logger.warning( + f"Unexpected error checking if commit {commit_sha[:7]} exists in branch {branch_name}: {e}" + ) return False def _get_linked_issues_graphql(self, pr_number: int) -> List[str]: @@ -447,7 +495,7 @@ def pr_title(self, target_branch) -> str: return title - def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None, dev_branch_name: Optional[str] = None, conflict_files: Optional[List[str]] = None) -> str: + def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Optional[str] = None, dev_branch_name: Optional[str] = None, conflict_files: Optional[List[str]] = None, pr_number: Optional[int] = None) -> str: """Generate PR body with improved format that passes validation""" commits = '\n'.join(self.pr_body_list) @@ -511,11 +559,30 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt conflicted_files_section = "" if conflict_files: conflicted_files_section = "\n\n**Files with conflicts:**\n" - for file_path in conflict_files: - # Create link to file in GitHub branch (will show conflict markers) - # Format: https://github.com/{repo}/blob/{branch}/{file_path} - file_link = f"https://github.com/{self.repo_name}/blob/{branch_for_instructions}/{file_path}" - conflicted_files_section += f"- [{file_path}]({file_link})\n" + for conflict_item in conflict_files: + # Handle both tuple (file_path, line) and string (file_path) for backward compatibility + if isinstance(conflict_item, tuple): + file_path, conflict_line = conflict_item + else: + file_path = conflict_item + conflict_line = None + + # Create link to file in PR diff (if PR is created) or branch (as fallback) + if pr_number: + # Link to file in PR diff - GitHub will show conflicts + # Format: https://github.com/{repo}/pull/{pr_number}/files#diff-{hash}L{line} + file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files#diff-{self._get_file_diff_hash(file_path)}" + if conflict_line: + file_link += f"L{conflict_line}" + else: + # Fallback: link to file in branch (will be updated after PR creation) + file_link = f"https://github.com/{self.repo_name}/blob/{branch_for_instructions}/{file_path}" + if conflict_line: + file_link += f"#L{conflict_line}" + + # Add line number to display text if available + display_text = f"{file_path} (line {conflict_line})" if conflict_line else file_path + conflicted_files_section += f"- [{display_text}]({file_link})\n" description_section += f""" #### Conflicts Require Manual Resolution @@ -559,6 +626,17 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt return pr_body + def _get_file_diff_hash(self, file_path: str) -> str: + """Generate a hash for GitHub PR diff link anchor""" + import hashlib + import base64 + # GitHub uses base64-encoded SHA1 of file path for diff anchors + # Format: base64(sha1(file_path)) + sha1_hash = hashlib.sha1(file_path.encode('utf-8')).digest() + base64_hash = base64.b64encode(sha1_hash).decode('utf-8') + # Remove padding and replace special characters + return base64_hash.rstrip('=').replace('+', '-').replace('/', '_') + def add_summary(self, msg): self.logger.info(msg) summary_path = os.getenv('GITHUB_STEP_SUMMARY') @@ -587,8 +665,8 @@ def git_run(self, *args): return output def _handle_cherry_pick_conflict(self, commit_sha: str): - """Handle cherry-pick conflict: commit the conflict and return list of conflicted files""" - conflict_files = [] + """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with line numbers""" + conflict_files = [] # List of (file_path, first_conflict_line) tuples try: # Check if there are conflicts (files with conflicts or unmerged paths) result = subprocess.run( @@ -614,7 +692,9 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): if len(parts) > 1: file_path = parts[1].strip() if file_path: - conflict_files.append(file_path) + # Find first conflict line in file + conflict_line = self._find_first_conflict_line(file_path) + conflict_files.append((file_path, conflict_line)) # If we didn't find unmerged paths, try to find files with conflict markers if not conflict_files: @@ -627,7 +707,12 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): check=True ) if diff_result.stdout.strip(): - conflict_files = [f.strip() for f in diff_result.stdout.strip().split('\n') if f.strip()] + for f in diff_result.stdout.strip().split('\n'): + file_path = f.strip() + if file_path: + # Find first conflict line in file + conflict_line = self._find_first_conflict_line(file_path) + conflict_files.append((file_path, conflict_line)) except subprocess.CalledProcessError: pass @@ -636,12 +721,24 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") if conflict_files: - self.logger.info(f"Conflicted files: {', '.join(conflict_files)}") + file_list = [f"{path} (line {line})" if line else path for path, line in conflict_files] + self.logger.info(f"Conflicted files: {', '.join(file_list)}") return True, conflict_files except Exception as e: self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") return False, [] + + def _find_first_conflict_line(self, file_path: str) -> Optional[int]: + """Find the first line number with conflict markers in a file""" + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + for line_num, line in enumerate(f, start=1): + if line.strip().startswith('<<<<<<<'): + return line_num + except (FileNotFoundError, IOError, UnicodeDecodeError) as e: + self.logger.debug(f"Failed to read file {file_path} to find conflict line: {e}") + return None def _find_existing_backport_comment(self, pull): """Find existing backport comment in PR (created by previous workflow run)""" @@ -733,7 +830,7 @@ def create_pr_for_branch(self, target_branch): """Create PR for target branch with conflict handling""" dev_branch_name = f"cherry-pick-{target_branch}-{self.dtm}" has_conflicts = False - all_conflict_files = [] # Collect all conflicted files across all commits + all_conflict_files = [] # Collect all conflicted files: [(file_path, conflict_line), ...] # First fetch for up-to-date branch information self.git_run("fetch", "origin", target_branch) @@ -771,10 +868,18 @@ def create_pr_for_branch(self, target_branch): conflict_handled, conflict_files = self._handle_cherry_pick_conflict(commit_sha) if conflict_handled: has_conflicts = True - # Add conflicted files to the list (avoid duplicates) - for file_path in conflict_files: - if file_path not in all_conflict_files: - all_conflict_files.append(file_path) + # Add conflicted files to the list (avoid duplicates by file path) + for conflict_item in conflict_files: + # conflict_item is (file_path, conflict_line) tuple + file_path, conflict_line = conflict_item + # Check if file_path already exists + existing = next((f for f in all_conflict_files if isinstance(f, tuple) and f[0] == file_path), None) + if not existing: + all_conflict_files.append((file_path, conflict_line)) + elif conflict_line and existing[1] is None: + # Update with line number if we found one and didn't have it before + idx = all_conflict_files.index(existing) + all_conflict_files[idx] = (file_path, conflict_line) else: # Failed to handle conflict, abort cherry-pick self.git_run("cherry-pick", "--abort") @@ -799,14 +904,24 @@ def create_pr_for_branch(self, target_branch): # Create PR (draft if there are conflicts) try: + # Create PR with initial body (without PR number for diff links) pr = self.repo.create_pull( base=target_branch, head=dev_branch_name, title=self.pr_title(target_branch), - body=self.pr_body(True, has_conflicts, target_branch, dev_branch_name, all_conflict_files if all_conflict_files else None), + body=self.pr_body(True, has_conflicts, target_branch, dev_branch_name, all_conflict_files if all_conflict_files else None, pr_number=None), maintainer_can_modify=True, draft=has_conflicts ) + + # If there are conflicts, update PR body with proper diff links + if has_conflicts and all_conflict_files: + try: + updated_body = self.pr_body(True, has_conflicts, target_branch, dev_branch_name, all_conflict_files, pr_number=pr.number) + pr.edit(body=updated_body) + self.logger.info(f"Updated PR body with diff links for conflicts") + except GithubException as e: + self.logger.warning(f"Failed to update PR body with diff links: {e}") except GithubException as e: self.has_errors = True self.logger.error(f"PR_CREATION_ERROR: Failed to create PR for branch {target_branch}: {e}") From 54cf67f3d159b6c0048a79270b54a6c37b9fe003 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 16:39:19 +0100 Subject: [PATCH 08/24] Refactor changelog extraction in cherry-pick script - Renamed method to extract only the content of the Changelog entry section from PR body. - Updated logic to collect and merge Changelog entry contents from multiple PRs, improving clarity and usability. - Enhanced fallback mechanisms for Changelog entry generation, ensuring robust handling of various PR scenarios. --- .github/scripts/cherry_pick.py | 92 ++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index c8b5bd0d639f..be77f05c29a7 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -442,45 +442,68 @@ def _extract_changelog_entry(self, pr_body: str) -> Optional[str]: return entry - def _extract_description_for_reviewers(self, pr_body: str) -> Optional[str]: - """Extract Description for reviewers section from PR body""" + def _extract_changelog_entry_content(self, pr_body: str) -> Optional[str]: + """Extract only the content inside Changelog entry section (without header and content before it) + + Returns only the text content inside "### Changelog entry" section (up to "### Changelog category"). + """ if not pr_body: return None - desc_match = re.search(r"### Description for reviewers.*?\n(.*?)(\n###|$)", pr_body, re.DOTALL) - if not desc_match: + # Find Changelog entry section + entry_match = re.search(r"### Changelog entry.*?\n(.*?)(\n### Changelog category|$)", pr_body, re.DOTALL) + if not entry_match: return None - desc = desc_match.group(1).strip() - # Skip if it's just "..." or empty - if desc in ['...', '']: + # Get only content of Changelog entry section (without header) + entry_content = entry_match.group(1).strip() + + # Skip if entry is just "..." or empty + if entry_content in ['...', '']: return None - return desc + return entry_content def _get_changelog_info(self) -> Tuple[Optional[str], Optional[str], Optional[str]]: - """Get Changelog category, entry, and description from original PRs""" + """Get Changelog category, entry, and merged entry content from original PRs + + For multiple PRs: merges Changelog entry contents with '---' separator. + Returns: (category, entry_text, merged_entry_content) + """ category = None entry = None - description = None + entry_contents = [] # Collect all Changelog entry contents to merge - # Try to get from first PR (usually there's one main PR) + # Collect info from all PRs for pull in self.pull_requests: if pull.body: - cat = self._extract_changelog_category(pull.body) - if cat: - category = cat - ent = self._extract_changelog_entry(pull.body) - if ent: - entry = ent - desc = self._extract_description_for_reviewers(pull.body) - if desc: - description = desc - # If we found all, we're done - if category and entry and description: - break - - return category, entry, description + # Extract category (take first non-empty) + if not category: + cat = self._extract_changelog_category(pull.body) + if cat: + category = cat + + # Extract entry text (take first non-empty) - used for building new entry if merged content not available + if not entry: + ent = self._extract_changelog_entry(pull.body) + if ent: + entry = ent + + # Extract only Changelog entry content (without header) + entry_content = self._extract_changelog_entry_content(pull.body) + if entry_content: + entry_contents.append(entry_content) + + # Merge entry contents if multiple PRs have content + if len(entry_contents) > 1: + # For multiple PRs, combine with separator + merged_entry_content = "\n\n---\n\n".join(entry_contents) + elif len(entry_contents) == 1: + merged_entry_content = entry_contents[0] + else: + merged_entry_content = None + + return category, entry, merged_entry_content def pr_title(self, target_branch) -> str: """Generate PR title""" @@ -508,17 +531,24 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # Determine target branch for description branch_desc = target_branch if target_branch else ', '.join(self.target_branches) - # Get Changelog category, entry, and description from original PR - changelog_category, changelog_entry, original_description = self._get_changelog_info() + # Get Changelog category, entry, and merged entry content from original PRs + changelog_category, changelog_entry_text, merged_entry_content = self._get_changelog_info() - # Build changelog entry if not found - if not changelog_entry: + # Use merged entry content if available, otherwise build entry + if merged_entry_content: + # Use merged content from original PRs directly in Changelog entry + changelog_entry = merged_entry_content + elif not changelog_entry_text: + # Build changelog entry if not found if len(self.pull_requests) == 1: pr_num = self.pull_requests[0].number changelog_entry = f"Backport of PR #{pr_num} to `{branch_desc}`" else: pr_nums = ', '.join([f"#{p.number}" for p in self.pull_requests]) changelog_entry = f"Backport of PRs {pr_nums} to `{branch_desc}`" + else: + # Use extracted entry text as fallback + changelog_entry = changelog_entry_text # Ensure entry is at least 20 characters (validation requirement) if len(changelog_entry) < 20: @@ -546,10 +576,6 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt description_section += f"- **Cherry-picked by:** @{self.workflow_triggerer}\n" description_section += f"- **Related issues:** {issue_refs}" - # If original PR had description, append it - if original_description: - description_section += f"\n\n#### Original Description\n{original_description}" - # Add conflicts section if needed if has_conflicts: # Use dev_branch_name if provided, otherwise use target_branch as fallback From b4b3ba2eb55b9cd14f849936305debcdb4921c1f Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 16:49:48 +0100 Subject: [PATCH 09/24] Enhance cherry-pick script with improved diff link generation - Updated the logic for generating links to files in PR diffs to use SHA256 hashes of file content, improving accuracy and reliability. - Introduced a fallback mechanism to link to the PR files page if the diff hash cannot be retrieved. - Refactored the method for computing file hashes to handle base64-encoded content, ensuring compatibility with GitHub's requirements for diff anchors. --- .github/scripts/cherry_pick.py | 53 +++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index be77f05c29a7..40b6160bb5ee 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -6,7 +6,10 @@ import subprocess import argparse import re +import hashlib +import base64 from typing import List, Optional, Tuple +from urllib.parse import quote from github import Github, GithubException, GithubObject, Commit import requests @@ -596,10 +599,21 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # Create link to file in PR diff (if PR is created) or branch (as fallback) if pr_number: # Link to file in PR diff - GitHub will show conflicts - # Format: https://github.com/{repo}/pull/{pr_number}/files#diff-{hash}L{line} - file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files#diff-{self._get_file_diff_hash(file_path)}" - if conflict_line: - file_link += f"L{conflict_line}" + # Format: https://github.com/{repo}/pull/{pr_number}/files#diff-{hash}R{line} + # GitHub diff hash is SHA256 of file content in hex format (64 chars) + # We need to get it from PR files API + diff_hash = self._get_file_diff_hash(file_path, pr_number) + if diff_hash: + file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files#diff-{diff_hash}" + if conflict_line: + file_link += f"R{conflict_line}" + else: + # Fallback: use simple link to PR files page (GitHub will find the file) + # GitHub can resolve files by name in PR view + encoded_path = quote(file_path, safe='') + file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files#diff-{encoded_path}" + if conflict_line: + file_link += f"R{conflict_line}" else: # Fallback: link to file in branch (will be updated after PR creation) file_link = f"https://github.com/{self.repo_name}/blob/{branch_for_instructions}/{file_path}" @@ -652,16 +666,27 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt return pr_body - def _get_file_diff_hash(self, file_path: str) -> str: - """Generate a hash for GitHub PR diff link anchor""" - import hashlib - import base64 - # GitHub uses base64-encoded SHA1 of file path for diff anchors - # Format: base64(sha1(file_path)) - sha1_hash = hashlib.sha1(file_path.encode('utf-8')).digest() - base64_hash = base64.b64encode(sha1_hash).decode('utf-8') - # Remove padding and replace special characters - return base64_hash.rstrip('=').replace('+', '-').replace('/', '_') + def _compute_file_hash(self, base64_content: str) -> str: + """Compute SHA256 hash from base64-encoded file content""" + decoded_content = base64.b64decode(base64_content).decode('utf-8', errors='ignore') + return hashlib.sha256(decoded_content.encode('utf-8')).hexdigest() + + def _get_file_diff_hash(self, file_path: str, pr_number: int) -> Optional[str]: + """Get diff hash for GitHub PR file link using GitHub API + + GitHub uses SHA256 hash of file content in hex format (64 chars) for diff anchors. + Format: #diff-{64-char-hex-sha256}R{line} + We fetch file content from the PR's head branch and compute SHA256 hash. + """ + try: + pr = self.repo.get_pull(pr_number) + file_content = self.repo.get_contents(file_path, ref=pr.head.sha) + if file_content and hasattr(file_content, 'content'): + return self._compute_file_hash(file_content.content) + except (GithubException, Exception) as e: + self.logger.debug(f"Failed to get diff hash for {file_path} from PR #{pr_number}: {e}") + + return None def add_summary(self, msg): self.logger.info(msg) From 381d617108543aa88f9ec4782bdd9d80bd2c06b9 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 17:10:39 +0100 Subject: [PATCH 10/24] Enhance cherry-pick script with improved backport comment management - Updated logic to check for existing backport comments and determine if updates are necessary based on the presence of target branches and workflow URLs. - Improved handling of comment updates to append results or replace "in progress" lines, ensuring clarity in backport status. - Enhanced logging for comment updates to provide better feedback on operations related to backport comments. --- .github/scripts/cherry_pick.py | 108 ++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 15 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 40b6160bb5ee..8fd457815957 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -813,20 +813,30 @@ def _create_initial_backport_comment(self): return target_branches_str = ', '.join([f"`{b}`" for b in self.target_branches]) - initial_comment = f"Backport to {target_branches_str} in progress: [workflow run]({self.workflow_url})" + new_line = f"Backport to {target_branches_str} in progress: [workflow run]({self.workflow_url})" for pull in self.pull_requests: try: # Try to find existing comment existing_comment = self._find_existing_backport_comment(pull) if existing_comment: - # Update existing comment - existing_comment.edit(initial_comment) - self.backport_comments.append((pull, existing_comment)) - self.logger.info(f"Updated existing backport comment in original PR #{pull.number}") + # Check if this workflow's branches are already mentioned + existing_body = existing_comment.body + branches_already_mentioned = all(f"`{b}`" in existing_body for b in self.target_branches) + + if branches_already_mentioned and self.workflow_url in existing_body: + # This workflow run already mentioned, skip update + self.backport_comments.append((pull, existing_comment)) + self.logger.debug(f"Backport comment already contains info for branches {self.target_branches}, skipping update") + else: + # Append new line to existing comment + updated_comment = f"{existing_body}\n\n{new_line}" + existing_comment.edit(updated_comment) + self.backport_comments.append((pull, existing_comment)) + self.logger.info(f"Updated existing backport comment in original PR #{pull.number}") else: # Create new comment - comment = pull.create_issue_comment(initial_comment) + comment = pull.create_issue_comment(new_line) self.backport_comments.append((pull, comment)) self.logger.info(f"Created initial backport comment in original PR #{pull.number}") except GithubException as e: @@ -839,38 +849,106 @@ def _update_backport_comment(self): for pull, comment in self.backport_comments: try: + # Get existing comment body + existing_body = comment.body + + # Build new results section for this workflow run total_branches = len(self.created_backport_prs) + len(self.skipped_branches) if total_branches == 0: # No branches processed (should not happen) - updated_comment = f"Backport to {', '.join([f'`{b}`' for b in self.target_branches])} completed with no results" + new_results = f"Backport to {', '.join([f'`{b}`' for b in self.target_branches])} completed with no results" if self.workflow_url: - updated_comment += f" - [workflow run]({self.workflow_url})" + new_results += f" - [workflow run]({self.workflow_url})" elif total_branches == 1 and len(self.created_backport_prs) == 1: # Single branch with PR - simple comment target_branch, pr, has_conflicts, conflict_files = self.created_backport_prs[0] status = "draft PR" if has_conflicts else "PR" - updated_comment = f"Backported to `{target_branch}`: {status} {pr.html_url}" + new_results = f"Backported to `{target_branch}`: {status} {pr.html_url}" if has_conflicts: - updated_comment += " (contains conflicts requiring manual resolution)" + new_results += " (contains conflicts requiring manual resolution)" if self.workflow_url: - updated_comment += f" - [workflow run]({self.workflow_url})" + new_results += f" - [workflow run]({self.workflow_url})" else: # Multiple branches or mixed results - list all - updated_comment = "Backport results:\n" + new_results = "Backport results:\n" # List created PRs for target_branch, pr, has_conflicts, conflict_files in self.created_backport_prs: status = "draft PR" if has_conflicts else "PR" conflict_note = " (contains conflicts requiring manual resolution)" if has_conflicts else "" - updated_comment += f"- `{target_branch}`: {status} {pr.html_url}{conflict_note}\n" + new_results += f"- `{target_branch}`: {status} {pr.html_url}{conflict_note}\n" # List skipped branches for target_branch, reason in self.skipped_branches: - updated_comment += f"- `{target_branch}`: skipped ({reason})\n" + new_results += f"- `{target_branch}`: skipped ({reason})\n" if self.workflow_url: - updated_comment += f"\n[workflow run]({self.workflow_url})" + new_results += f"\n[workflow run]({self.workflow_url})" + + # Replace the "in progress" line for this workflow run, or append results + if self.workflow_url in existing_body: + # Find and replace the "in progress" line for this workflow + # Handle both single-line and multi-line results + lines = existing_body.split('\n') + updated_lines = [] + workflow_found = False + in_results_block = False # Track if we're inside a multi-line results block + + for line in lines: + if self.workflow_url in line: + if "in progress" in line: + # Replace "in progress" line with results + updated_lines.append(new_results) + workflow_found = True + # If results are multi-line, mark that we're in a results block + if new_results.count('\n') > 0: + in_results_block = True + elif in_results_block: + # We're inside a results block for this workflow + # Skip lines until we find an empty line or a line with different workflow URL + if line.strip() == '': + in_results_block = False + updated_lines.append(line) + elif self.workflow_url not in line: + # Different workflow or end of results block + in_results_block = False + updated_lines.append(line) + # else: skip this line (it's part of old results) + else: + # Workflow URL found but not "in progress" - might be old results + # Check if this line is part of results for this workflow + is_result_line = ( + any(f"`{b}`" in line for b in self.target_branches) or + "Backport results:" in line or + line.strip().startswith("- `") + ) + if is_result_line and self.workflow_url in line: + # This is a results line for this workflow, replace with new results + updated_lines.append(new_results) + workflow_found = True + if new_results.count('\n') > 0: + in_results_block = True + else: + updated_lines.append(line) + elif in_results_block: + # We're inside a results block, skip until empty line + if line.strip() == '': + in_results_block = False + updated_lines.append(line) + # else: skip this line + else: + updated_lines.append(line) + + if not workflow_found: + # Workflow line not found, append results + updated_lines.append("") + updated_lines.append(new_results) + + updated_comment = '\n'.join(updated_lines) + else: + # Workflow URL not in comment, append results + updated_comment = f"{existing_body}\n\n{new_results}" comment.edit(updated_comment) self.logger.info(f"Updated backport comment in original PR #{pull.number}") From 3e4a4fb2a73d3840954141dd927c7732e0de27f7 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 18:05:22 +0100 Subject: [PATCH 11/24] Refactor cherry-pick script for improved PR validation and changelog extraction - Enhanced import handling for the PR template to include error logging for import failures. - Updated PR validation logic to utilize already loaded PR objects, reducing unnecessary API calls. - Refactored changelog entry extraction to support stopping at the changelog category, improving flexibility. - Improved file hash computation to handle both text and binary files correctly. - Enhanced conflict line detection and error handling for better user feedback during cherry-pick operations. --- .github/scripts/cherry_pick.py | 103 +++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 44 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 8fd457815957..0b748908eea2 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -14,13 +14,18 @@ import requests # Import PR template and categories from validate_pr_description action -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'actions', 'validate_pr_description')) -from pr_template import ( - ISSUE_PATTERNS, - FOR_CHANGELOG_CATEGORIES, - NOT_FOR_CHANGELOG_CATEGORIES, - get_category_section_template -) +try: + pr_template_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'actions', 'validate_pr_description')) + sys.path.insert(0, pr_template_path) + from pr_template import ( + ISSUE_PATTERNS, + FOR_CHANGELOG_CATEGORIES, + NOT_FOR_CHANGELOG_CATEGORIES, + get_category_section_template + ) +except ImportError as e: + logging.error(f"Failed to import pr_template: {e}") + raise class CherryPickCreator: @@ -223,12 +228,14 @@ def __add_pull(self, p: int, single: bool): def _validate_prs(self): """Validate PR numbers""" - # Validate PRs from self.pull_requests (already loaded) + # Validate PRs from self.pull_requests (already loaded, no need to re-fetch) for pull in self.pull_requests: try: - fetched_pull = self.repo.get_pull(pull.number) - if not fetched_pull.merged and not self.allow_unmerged: - raise ValueError(f"PR #{fetched_pull.number} is not merged. Use --allow-unmerged to allow backporting unmerged PRs") + # Use already loaded PR object instead of re-fetching + if not hasattr(pull, "merged"): + raise ValueError(f"PR #{pull.number} object is missing 'merged' attribute, cannot validate") + if not pull.merged and not self.allow_unmerged: + raise ValueError(f"PR #{pull.number} is not merged. Use --allow-unmerged to allow backporting unmerged PRs") except GithubException as e: raise ValueError(f"PR #{pull.number} does not exist or is not accessible: {e}") @@ -429,12 +436,26 @@ def _extract_changelog_category(self, pr_body: str) -> Optional[str]: return None - def _extract_changelog_entry(self, pr_body: str) -> Optional[str]: - """Extract Changelog entry from PR body""" + def _extract_changelog_entry(self, pr_body: str, stop_at_category: bool = False) -> Optional[str]: + """Extract Changelog entry from PR body + + Args: + pr_body: PR body text + stop_at_category: If True, stop at "### Changelog category", otherwise stop at any "###" + + Returns: + Content of Changelog entry section, or None if not found + """ if not pr_body: return None - entry_match = re.search(r"### Changelog entry.*?\n(.*?)(\n###|$)", pr_body, re.DOTALL) + # Choose regex pattern based on stop_at_category + if stop_at_category: + pattern = r"### Changelog entry.*?\n(.*?)(\n### Changelog category|$)" + else: + pattern = r"### Changelog entry.*?\n(.*?)(\n###|$)" + + entry_match = re.search(pattern, pr_body, re.DOTALL) if not entry_match: return None @@ -450,22 +471,7 @@ def _extract_changelog_entry_content(self, pr_body: str) -> Optional[str]: Returns only the text content inside "### Changelog entry" section (up to "### Changelog category"). """ - if not pr_body: - return None - - # Find Changelog entry section - entry_match = re.search(r"### Changelog entry.*?\n(.*?)(\n### Changelog category|$)", pr_body, re.DOTALL) - if not entry_match: - return None - - # Get only content of Changelog entry section (without header) - entry_content = entry_match.group(1).strip() - - # Skip if entry is just "..." or empty - if entry_content in ['...', '']: - return None - - return entry_content + return self._extract_changelog_entry(pr_body, stop_at_category=True) def _get_changelog_info(self) -> Tuple[Optional[str], Optional[str], Optional[str]]: """Get Changelog category, entry, and merged entry content from original PRs @@ -608,19 +614,18 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt if conflict_line: file_link += f"R{conflict_line}" else: - # Fallback: use simple link to PR files page (GitHub will find the file) - # GitHub can resolve files by name in PR view - encoded_path = quote(file_path, safe='') - file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files#diff-{encoded_path}" - if conflict_line: - file_link += f"R{conflict_line}" + # Fallback: use simple link to PR files page (no anchor, since we can't get the hash) + # GitHub doesn't support encoded path in diff anchor, so we link to files tab without anchor + file_link = f"https://github.com/{self.repo_name}/pull/{pr_number}/files" + # Line number is shown in link text, not in URL else: # Fallback: link to file in branch (will be updated after PR creation) file_link = f"https://github.com/{self.repo_name}/blob/{branch_for_instructions}/{file_path}" if conflict_line: file_link += f"#L{conflict_line}" - # Add line number to display text if available + # Add line number to display text if available (always try to show it if we have it) + # If conflict_line is None, it means the whole file is in conflict, so don't add line number display_text = f"{file_path} (line {conflict_line})" if conflict_line else file_path conflicted_files_section += f"- [{display_text}]({file_link})\n" @@ -667,9 +672,12 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt return pr_body def _compute_file_hash(self, base64_content: str) -> str: - """Compute SHA256 hash from base64-encoded file content""" - decoded_content = base64.b64decode(base64_content).decode('utf-8', errors='ignore') - return hashlib.sha256(decoded_content.encode('utf-8')).hexdigest() + """Compute SHA256 hash from base64-encoded file content + + Hashes bytes directly to support both text and binary files correctly. + """ + decoded_bytes = base64.b64decode(base64_content) + return hashlib.sha256(decoded_bytes).hexdigest() def _get_file_diff_hash(self, file_path: str, pr_number: int) -> Optional[str]: """Get diff hash for GitHub PR file link using GitHub API @@ -781,7 +789,12 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): return False, [] def _find_first_conflict_line(self, file_path: str) -> Optional[int]: - """Find the first line number with conflict markers in a file""" + """Find the first line number with conflict markers in a file + + Returns: + Line number of first conflict marker (<<<<<<<) if found, None otherwise. + None means either the whole file is in conflict or we couldn't determine the line. + """ try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: for line_num, line in enumerate(f, start=1): @@ -800,7 +813,7 @@ def _find_existing_backport_comment(self, pull): # Check if comment is from YDBot and contains "Backport" keyword if comment.user.login == "YDBot" and "Backport" in comment.body: # Check if it's an initial comment (contains "in progress") or a result comment - if "in progress" in comment.body or "Backport" in comment.body: + if "in progress" in comment.body: return comment except Exception as e: self.logger.debug(f"Failed to find existing comment in PR #{pull.number}: {e}") @@ -1050,7 +1063,9 @@ def create_pr_for_branch(self, target_branch): pr.edit(body=updated_body) self.logger.info(f"Updated PR body with diff links for conflicts") except GithubException as e: - self.logger.warning(f"Failed to update PR body with diff links: {e}") + self.has_errors = True + self.logger.error(f"PR_BODY_UPDATE_ERROR: Failed to update PR body with diff links for branch {target_branch}: {e}") + raise except GithubException as e: self.has_errors = True self.logger.error(f"PR_CREATION_ERROR: Failed to create PR for branch {target_branch}: {e}") @@ -1154,7 +1169,7 @@ def process(self): else: reason = f"git command failed (exit code {e.returncode})" self.skipped_branches.append((target, reason)) - except BaseException as e: + except Exception as e: self.has_errors = True error_msg = f"UNEXPECTED_ERROR: Branch {target} - {type(e).__name__}: {e}" self.logger.error(error_msg) From 30583a3827c845ca544df8649bda9ff2bc7dfbc0 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 18:15:53 +0100 Subject: [PATCH 12/24] Update cherry-pick script to change cherry-pick command options - Modified the cherry-pick command to use '--empty=drop' instead of '--allow-empty', improving handling of empty commits during cherry-picking operations. --- .github/scripts/cherry_pick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 0b748908eea2..7233084dc374 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -1001,7 +1001,7 @@ def create_pr_for_branch(self, target_branch): # Cherry-pick commits for commit_sha in commits_to_pick: try: - self.git_run("cherry-pick", "--allow-empty", commit_sha) + self.git_run("cherry-pick", "--empty=drop", commit_sha) except subprocess.CalledProcessError as e: error_output = e.output.decode() if e.output else "" # Check if this is a conflict or another error From b3c135c970f2903aa9d102457dd391c24c1d886d Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 18:26:58 +0100 Subject: [PATCH 13/24] Refactor cherry-pick command in script to simplify handling of empty commits - Removed the '--empty=drop' option from the cherry-pick command, as it is unnecessary due to Git's default behavior of skipping empty commits. This change streamlines the command and improves clarity in the script. --- .github/scripts/cherry_pick.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 7233084dc374..00e67a4f6a02 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -999,9 +999,10 @@ def create_pr_for_branch(self, target_branch): return # Cherry-pick commits + # Note: git cherry-pick by default skips empty commits, so we don't need --empty=drop (which doesn't exist) for commit_sha in commits_to_pick: try: - self.git_run("cherry-pick", "--empty=drop", commit_sha) + self.git_run("cherry-pick", commit_sha) except subprocess.CalledProcessError as e: error_output = e.output.decode() if e.output else "" # Check if this is a conflict or another error From 8c7ac30299f2164408e04d0febb5d3d836a595bf Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 23:42:22 +0100 Subject: [PATCH 14/24] Refactor cherry-pick script to streamline commit existence checks and improve error handling - Removed the method for checking if a commit exists in the target branch, simplifying the cherry-pick process. - Updated the cherry-pick logic to handle cases where commits are already applied, improving user feedback and logging during cherry-pick operations. --- .github/scripts/cherry_pick.py | 95 ++++------------------------------ 1 file changed, 10 insertions(+), 85 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 00e67a4f6a02..606fe4a9bbee 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -271,73 +271,6 @@ def _validate_inputs(self): self._validate_branches() self.logger.info("Input validation successful") - def _is_commit_in_branch(self, commit_sha: str, branch_name: str) -> bool: - """Check if commit already exists in target branch""" - try: - # First fetch for up-to-date branch information - self.git_run("fetch", "origin", branch_name) - - # Check via merge-base - # Note: --is-ancestor returns: - # - 0 if commit is ancestor (exists in branch) - # - 1 if commit is NOT ancestor (normal case, not an error) - # - >1 for real errors (commit doesn't exist, git problems, etc.) - result = subprocess.run( - ['git', 'merge-base', '--is-ancestor', commit_sha, f'origin/{branch_name}'], - capture_output=True, - text=True, - check=False # Don't raise on non-zero exit (expected for "not ancestor") - ) - - # Exit code 0: commit is ancestor (exists in branch) - if result.returncode == 0: - return True - - # Exit code 1: commit is NOT ancestor (normal case, not an error) - if result.returncode == 1: - return False - - # Exit code > 1: real error - verify if commit exists for better diagnostics - error_msg = result.stderr.strip() if result.stderr else "Unknown error" - - # Try to verify if commit exists - try: - verify_result = subprocess.run( - ['git', 'rev-parse', '--verify', commit_sha], - capture_output=True, - text=True, - check=False - ) - if verify_result.returncode != 0: - # Commit doesn't exist - this is a real problem - self.logger.error( - f"Commit {commit_sha[:7]} does not exist in repository. " - f"Original error: {error_msg}" - ) - # Still return False - cherry-pick will fail with clear error - else: - # Commit exists but merge-base failed - might be temporary issue - self.logger.warning( - f"git merge-base failed (exit {result.returncode}) but commit exists. " - f"Error: {error_msg}. This might be a temporary issue." - ) - except Exception: - # Verification failed, just log original error - pass - - self.logger.warning( - f"git merge-base failed with exit code {result.returncode} " - f"for commit {commit_sha[:7]} in branch {branch_name}: {error_msg}" - ) - return False - - except Exception as e: - # Catch other unexpected errors (network issues, permissions, etc.) - self.logger.warning( - f"Unexpected error checking if commit {commit_sha[:7]} exists in branch {branch_name}: {e}" - ) - return False - def _get_linked_issues_graphql(self, pr_number: int) -> List[str]: """Get linked issues via GraphQL API""" query = """ @@ -982,31 +915,23 @@ def create_pr_for_branch(self, target_branch): self.git_run("checkout", "-B", target_branch, f"origin/{target_branch}") self.git_run("checkout", "-b", dev_branch_name) - # Filter commits that already exist in branch - commits_to_pick = [] - for commit_sha in self.commit_shas: - if self._is_commit_in_branch(commit_sha, target_branch): - self.logger.info(f"Commit {commit_sha[:7]} already exists in branch {target_branch}, skipping") - self.add_summary(f"Commit {commit_sha[:7]} already exists in branch {target_branch}, skipped") - else: - commits_to_pick.append(commit_sha) - - if not commits_to_pick: - reason = "all commits already exist in target branch" - self.logger.info(f"All commits already exist in branch {target_branch}, skipping PR creation") - self.add_summary(f"All commits already exist in branch {target_branch}, skipping PR creation") - self.skipped_branches.append((target_branch, reason)) - return - # Cherry-pick commits # Note: git cherry-pick by default skips empty commits, so we don't need --empty=drop (which doesn't exist) - for commit_sha in commits_to_pick: + for commit_sha in self.commit_shas: try: self.git_run("cherry-pick", commit_sha) except subprocess.CalledProcessError as e: error_output = e.output.decode() if e.output else "" + stderr_output = e.stderr.decode() if e.stderr else "" + full_output = error_output + stderr_output + + # Check if commit is already applied (git cherry-pick returns error but commit is already in branch) + if "empty" in full_output.lower() or "nothing to commit" in full_output.lower(): + self.logger.info(f"Commit {commit_sha[:7]} changes is already in branch {target_branch}, skipping") + continue + # Check if this is a conflict or another error - if "CONFLICT" in error_output or "conflict" in error_output.lower(): + if "conflict" in full_output.lower(): self.logger.warning(f"Conflict during cherry-pick of commit {commit_sha[:7]}") conflict_handled, conflict_files = self._handle_cherry_pick_conflict(commit_sha) if conflict_handled: From 6a8bb9fe97a322031913f66c7742a0f8b04ffac8 Mon Sep 17 00:00:00 2001 From: naspirato Date: Tue, 18 Nov 2025 23:59:07 +0100 Subject: [PATCH 15/24] Enhance cherry-pick script to improve conflict message handling and logging - Updated the conflict resolution logic to capture and display detailed conflict messages alongside file paths and line numbers. - Introduced a new method to extract conflict messages from git output, enhancing clarity during manual resolution. - Improved logging to provide detailed information about detected conflicts, aiding in troubleshooting and resolution efforts. --- .github/scripts/cherry_pick.py | 131 ++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 28 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 606fe4a9bbee..eaae06aae9fb 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -523,17 +523,20 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # Use dev_branch_name if provided, otherwise use target_branch as fallback branch_for_instructions = dev_branch_name if dev_branch_name else (target_branch if target_branch else "") - # Build conflicted files list with links + # Build conflicted files list with links and conflict messages conflicted_files_section = "" if conflict_files: conflicted_files_section = "\n\n**Files with conflicts:**\n" for conflict_item in conflict_files: - # Handle both tuple (file_path, line) and string (file_path) for backward compatibility + # Handle both tuple (file_path, line, message) and string (file_path) for backward compatibility if isinstance(conflict_item, tuple): - file_path, conflict_line = conflict_item + file_path = conflict_item[0] + conflict_line = conflict_item[1] if len(conflict_item) > 1 else None + conflict_message = conflict_item[2] if len(conflict_item) > 2 else None else: file_path = conflict_item conflict_line = None + conflict_message = None # Create link to file in PR diff (if PR is created) or branch (as fallback) if pr_number: @@ -561,6 +564,10 @@ def pr_body(self, with_wf: bool, has_conflicts: bool = False, target_branch: Opt # If conflict_line is None, it means the whole file is in conflict, so don't add line number display_text = f"{file_path} (line {conflict_line})" if conflict_line else file_path conflicted_files_section += f"- [{display_text}]({file_link})\n" + + # Add conflict message if available + if conflict_message: + conflicted_files_section += f" - `{conflict_message}`\n" description_section += f""" #### Conflicts Require Manual Resolution @@ -656,9 +663,50 @@ def git_run(self, *args): self.logger.debug("Command output:\n%s", output) return output - def _handle_cherry_pick_conflict(self, commit_sha: str): - """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with line numbers""" - conflict_files = [] # List of (file_path, first_conflict_line) tuples + def _extract_conflict_messages(self, git_output: str) -> List[str]: + """Extract CONFLICT messages from git cherry-pick output + + Git supports the following conflict types: + - CONFLICT (content): Both branches modified the same part of a file (status: UU) + - CONFLICT (modify/delete): File modified in one branch, deleted in another (status: DU or UD) + - CONFLICT (add/add): File added in both branches with different content (status: AA) + - CONFLICT (rename/delete): File renamed in one branch, deleted in another + - CONFLICT (rename/rename): File renamed differently in both branches + - CONFLICT (delete/modify): File deleted in one branch, modified in another (status: DU or UD) + + This function extracts all CONFLICT messages from git output, which includes the full description + of what happened (e.g., "CONFLICT (modify/delete): file.py deleted in HEAD and modified in..."). + + Returns: + List of conflict messages (e.g., "CONFLICT (modify/delete): file.py deleted in HEAD and modified in...") + """ + conflict_messages = [] + lines = git_output.split('\n') + + for i, line in enumerate(lines): + if 'CONFLICT' in line and '(' in line and ')' in line: + # Found a CONFLICT line, collect it and any continuation lines + conflict_msg = line.strip() + # Check if next line is a continuation (starts with "Version" or is part of the message) + j = i + 1 + while j < len(lines) and (lines[j].strip().startswith('Version') or + (lines[j].strip() and not lines[j].strip().startswith('hint:') and + not lines[j].strip().startswith('error:'))): + conflict_msg += ' ' + lines[j].strip() + j += 1 + conflict_messages.append(conflict_msg) + + return conflict_messages + + def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): + """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages + + Returns: + (success, conflict_files) where conflict_files is list of (file_path, conflict_line, conflict_message) tuples + """ + conflict_files = [] # List of (file_path, conflict_line, conflict_message) tuples + conflict_messages = self._extract_conflict_messages(git_output) + try: # Check if there are conflicts (files with conflicts or unmerged paths) result = subprocess.run( @@ -668,25 +716,37 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): check=True ) - # Also check for conflict markers - has_conflict_markers = False if result.stdout.strip(): # Check for conflict markers in files status_lines = result.stdout.strip().split('\n') for line in status_lines: - if line.startswith('UU') or line.startswith('AA') or line.startswith('DD'): + # Git status codes for conflicts: + # UU: both modified (content conflict) + # AA: both added (add/add conflict) + # DD: both deleted (delete/delete - usually not a conflict, but git marks it) + # DU: deleted by us, updated by them (modify/delete or delete/modify) + # UD: updated by us, deleted by them (modify/delete or delete/modify) + # AU: added by us, updated by them (add/modify conflict) + # UA: updated by us, added by them (modify/add conflict) + if line.startswith('UU') or line.startswith('AA') or line.startswith('DD') or \ + line.startswith('DU') or line.startswith('UD') or line.startswith('AU') or line.startswith('UA'): # Unmerged paths - definitely a conflict - has_conflict_markers = True # Extract filename (after status code) - # Format: "XY filename" where XY is 2-char status, then space, then filename - # More robust: split by whitespace and take everything after status code parts = line.split(None, 1) # Split on whitespace, max 1 split if len(parts) > 1: file_path = parts[1].strip() if file_path: # Find first conflict line in file conflict_line = self._find_first_conflict_line(file_path) - conflict_files.append((file_path, conflict_line)) + + # Find matching conflict message from git output + conflict_message = None + for msg in conflict_messages: + if file_path in msg: + conflict_message = msg + break + + conflict_files.append((file_path, conflict_line, conflict_message)) # If we didn't find unmerged paths, try to find files with conflict markers if not conflict_files: @@ -704,17 +764,31 @@ def _handle_cherry_pick_conflict(self, commit_sha: str): if file_path: # Find first conflict line in file conflict_line = self._find_first_conflict_line(file_path) - conflict_files.append((file_path, conflict_line)) + + # Find matching conflict message + conflict_message = None + for msg in conflict_messages: + if file_path in msg: + conflict_message = msg + break + + conflict_files.append((file_path, conflict_line, conflict_message)) except subprocess.CalledProcessError: pass + # Log detailed conflict information + if conflict_files: + self.logger.info(f"Detected {len(conflict_files)} conflict(s) for commit {commit_sha[:7]}:") + for file_path, conflict_line, conflict_message in conflict_files: + if conflict_message: + self.logger.info(f" {conflict_message}") + else: + self.logger.info(f" - {file_path}" + (f" (line {conflict_line})" if conflict_line else "")) + # There are changes (conflicts or other modifications) self.git_run("add", "-A") self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") - if conflict_files: - file_list = [f"{path} (line {line})" if line else path for path, line in conflict_files] - self.logger.info(f"Conflicted files: {', '.join(file_list)}") return True, conflict_files except Exception as e: self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") @@ -933,21 +1007,22 @@ def create_pr_for_branch(self, target_branch): # Check if this is a conflict or another error if "conflict" in full_output.lower(): self.logger.warning(f"Conflict during cherry-pick of commit {commit_sha[:7]}") - conflict_handled, conflict_files = self._handle_cherry_pick_conflict(commit_sha) + conflict_handled, conflict_files = self._handle_cherry_pick_conflict(commit_sha, full_output) if conflict_handled: has_conflicts = True # Add conflicted files to the list (avoid duplicates by file path) + # conflict_item is now (file_path, conflict_line, conflict_message) tuple for conflict_item in conflict_files: - # conflict_item is (file_path, conflict_line) tuple - file_path, conflict_line = conflict_item - # Check if file_path already exists - existing = next((f for f in all_conflict_files if isinstance(f, tuple) and f[0] == file_path), None) - if not existing: - all_conflict_files.append((file_path, conflict_line)) - elif conflict_line and existing[1] is None: - # Update with line number if we found one and didn't have it before - idx = all_conflict_files.index(existing) - all_conflict_files[idx] = (file_path, conflict_line) + if len(conflict_item) >= 2: + file_path = conflict_item[0] + # Check if file_path already exists + existing = next((f for f in all_conflict_files if isinstance(f, tuple) and len(f) > 0 and f[0] == file_path), None) + if not existing: + all_conflict_files.append(conflict_item) + elif len(conflict_item) >= 3 and conflict_item[2] and (len(existing) < 3 or not existing[2]): + # Update with conflict message if we found one and didn't have it before + idx = all_conflict_files.index(existing) + all_conflict_files[idx] = conflict_item else: # Failed to handle conflict, abort cherry-pick self.git_run("cherry-pick", "--abort") From 4e03d14ea14dfe7284c71fdda1c771e5bc3913f6 Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 00:09:41 +0100 Subject: [PATCH 16/24] Enhance cherry-pick script to improve conflict summary generation - Updated the conflict handling logic to create a detailed summary of files with conflicts, including file paths, line numbers, and associated messages. - Improved the formatting of PR summaries for better readability, ensuring clarity when conflicts arise during cherry-picking operations. --- .github/scripts/cherry_pick.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index eaae06aae9fb..bc2837163328 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -1082,10 +1082,32 @@ def create_pr_for_branch(self, target_branch): if has_conflicts: self.logger.info(f"Created draft PR {pr.html_url} for branch {target_branch} with conflicts") - self.add_summary(f"Branch {target_branch}: Draft PR {pr.html_url} created with conflicts") + summary_msg = f"### Branch `{target_branch}`: Draft PR {pr.html_url} created with conflicts\n\n" + if all_conflict_files: + summary_msg += "**Files with conflicts:**\n\n" + for conflict_item in all_conflict_files: + # Handle both tuple (file_path, line, message) and string (file_path) for backward compatibility + if isinstance(conflict_item, tuple): + file_path = conflict_item[0] + conflict_line = conflict_item[1] if len(conflict_item) > 1 else None + conflict_message = conflict_item[2] if len(conflict_item) > 2 else None + else: + file_path = conflict_item + conflict_line = None + conflict_message = None + + summary_msg += f"- `{file_path}`" + if conflict_line: + summary_msg += f" (line {conflict_line})" + summary_msg += "\n" + + if conflict_message: + summary_msg += f" ```\n {conflict_message}\n ```\n" + summary_msg += "\n" + self.add_summary(summary_msg) else: self.logger.info(f"Created PR {pr.html_url} for branch {target_branch}") - self.add_summary(f"Branch {target_branch}: PR {pr.html_url} created") + self.add_summary(f"### Branch `{target_branch}`: PR {pr.html_url} created\n") # Enable automerge only if there are no conflicts if not has_conflicts: From b19bb248ff706d36292b1af03257907a2758552c Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 11:02:29 +0100 Subject: [PATCH 17/24] Refine conflict message extraction in cherry-pick script - Enhanced the logic for capturing conflict messages to ensure accurate matching of file paths, including handling both relative and absolute paths. - Improved the detection of continuation lines in conflict messages, allowing for better summarization of conflicts. - Added a cleanup step to quit the cherry-pick operation if no commits are left to apply, improving script robustness. --- .github/scripts/cherry_pick.py | 67 +++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index bc2837163328..a9368e940faf 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -688,11 +688,19 @@ def _extract_conflict_messages(self, git_output: str) -> List[str]: # Found a CONFLICT line, collect it and any continuation lines conflict_msg = line.strip() # Check if next line is a continuation (starts with "Version" or is part of the message) + # Stop if we encounter another CONFLICT, hint:, error:, or empty line j = i + 1 - while j < len(lines) and (lines[j].strip().startswith('Version') or - (lines[j].strip() and not lines[j].strip().startswith('hint:') and - not lines[j].strip().startswith('error:'))): - conflict_msg += ' ' + lines[j].strip() + while j < len(lines): + next_line = lines[j].strip() + # Stop if we encounter another CONFLICT message + if 'CONFLICT' in next_line and '(' in next_line and ')' in next_line: + break + # Stop on hint:, error:, or empty line + if next_line.startswith('hint:') or next_line.startswith('error:') or not next_line: + break + # Continue if it's a continuation (starts with "Version" or is part of the message) + if next_line.startswith('Version') or next_line: + conflict_msg += ' ' + next_line j += 1 conflict_messages.append(conflict_msg) @@ -740,11 +748,28 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): conflict_line = self._find_first_conflict_line(file_path) # Find matching conflict message from git output + # Git outputs messages like "CONFLICT (type): filename ..." + # We need to match the message that specifically mentions this file conflict_message = None + file_basename = os.path.basename(file_path) for msg in conflict_messages: - if file_path in msg: - conflict_message = msg - break + # Git format: "CONFLICT (type): filename ..." + # Extract filename from message (first word after ": ") + if ': ' in msg: + msg_file_part = msg.split(': ', 1)[1] + # Extract filename (first word after ": ") + msg_filename = msg_file_part.split()[0] if msg_file_part.split() else "" + # Match if filename matches (handle both relative and absolute paths) + if msg_filename == file_path or msg_filename == file_basename: + conflict_message = msg + break + # Fallback: check if file_path appears right after "CONFLICT (type): " + elif file_path in msg: + # Make sure it's not a substring match (e.g., "file.py" in "file.pyc") + # Check if file_path appears as a word boundary match + if re.search(rf'\b{re.escape(file_path)}\b', msg) or re.search(rf'\b{re.escape(file_basename)}\b', msg): + conflict_message = msg + break conflict_files.append((file_path, conflict_line, conflict_message)) @@ -766,11 +791,27 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): conflict_line = self._find_first_conflict_line(file_path) # Find matching conflict message + # Git outputs messages like "CONFLICT (type): filename ..." conflict_message = None + file_basename = os.path.basename(file_path) for msg in conflict_messages: - if file_path in msg: - conflict_message = msg - break + # Git format: "CONFLICT (type): filename ..." + # Extract filename from message (first word after ": ") + if ': ' in msg: + msg_file_part = msg.split(': ', 1)[1] + # Extract filename (first word after ": ") + msg_filename = msg_file_part.split()[0] if msg_file_part.split() else "" + # Match if filename matches (handle both relative and absolute paths) + if msg_filename == file_path or msg_filename == file_basename: + conflict_message = msg + break + # Fallback: check if file_path appears right after "CONFLICT (type): " + elif file_path in msg: + # Make sure it's not a substring match (e.g., "file.py" in "file.pyc") + # Check if file_path appears as a word boundary match + if re.search(rf'\b{re.escape(file_path)}\b', msg) or re.search(rf'\b{re.escape(file_basename)}\b', msg): + conflict_message = msg + break conflict_files.append((file_path, conflict_line, conflict_message)) except subprocess.CalledProcessError: @@ -1002,6 +1043,12 @@ def create_pr_for_branch(self, target_branch): # Check if commit is already applied (git cherry-pick returns error but commit is already in branch) if "empty" in full_output.lower() or "nothing to commit" in full_output.lower(): self.logger.info(f"Commit {commit_sha[:7]} changes is already in branch {target_branch}, skipping") + # Quit cherry-pick operation to clean up state before next commit + try: + self.git_run("cherry-pick", "--quit") + except subprocess.CalledProcessError: + # If --quit fails, we're probably not in cherry-pick state, continue anyway + pass continue # Check if this is a conflict or another error From 28d2b67299d0376030628613944fe4d0df7d168b Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 12:02:02 +0100 Subject: [PATCH 18/24] Refine changelog category extraction in cherry-pick script - Updated the logic to collect and determine the changelog category from multiple PRs, ensuring it is only set if all categories are the same. - Improved handling of category extraction to allow for better flexibility when dealing with differing categories across PRs. - Enhanced comments for clarity on the purpose of category determination in the changelog process. --- .github/scripts/cherry_pick.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index a9368e940faf..8bccf286d887 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -410,20 +410,20 @@ def _get_changelog_info(self) -> Tuple[Optional[str], Optional[str], Optional[st """Get Changelog category, entry, and merged entry content from original PRs For multiple PRs: merges Changelog entry contents with '---' separator. + If categories differ between PRs, returns None for category (to use template). Returns: (category, entry_text, merged_entry_content) """ - category = None + categories = [] # Collect all categories from all PRs entry = None entry_contents = [] # Collect all Changelog entry contents to merge # Collect info from all PRs for pull in self.pull_requests: if pull.body: - # Extract category (take first non-empty) - if not category: - cat = self._extract_changelog_category(pull.body) - if cat: - category = cat + # Extract category from each PR + cat = self._extract_changelog_category(pull.body) + if cat: + categories.append(cat) # Extract entry text (take first non-empty) - used for building new entry if merged content not available if not entry: @@ -436,6 +436,16 @@ def _get_changelog_info(self) -> Tuple[Optional[str], Optional[str], Optional[st if entry_content: entry_contents.append(entry_content) + # Determine category: use it only if all PRs have the same category + category = None + if categories: + # Check if all categories are the same + unique_categories = set(categories) + if len(unique_categories) == 1: + # All PRs have the same category, use it + category = categories[0] + # If categories differ, category remains None (will use template) + # Merge entry contents if multiple PRs have content if len(entry_contents) > 1: # For multiple PRs, combine with separator From 75976a07030eb50d01a853990f92c307f0e873db Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 12:09:49 +0100 Subject: [PATCH 19/24] Add missing conflict markers for special conflict types in cherry-pick script - Introduced a new method to add Git conflict markers for conflict types (DU, UD, AA) where they are not generated automatically. - Enhanced the conflict resolution process by ensuring visibility of conflicts in PR diffs, improving user experience during cherry-picking. - Updated logging to provide feedback when conflict markers are added, aiding in troubleshooting and resolution efforts. --- .github/scripts/cherry_pick.py | 111 +++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 8bccf286d887..dd5b9fc666ca 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -716,6 +716,112 @@ def _extract_conflict_messages(self, git_output: str) -> List[str]: return conflict_messages + def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str) -> None: + """Add conflict markers for conflict types where Git doesn't generate them automatically + + This function handles conflicts where Git leaves files without conflict markers. + It adds standard Git conflict markers (<<<<<<<, =======, >>>>>>>) to make conflicts visible in PR diffs. + + Handled conflict types (where Git doesn't generate markers): + - DU (deleted by us, updated by them): File deleted in HEAD, modified in cherry-pick + - UD (updated by us, deleted by them): File modified in HEAD, deleted in cherry-pick + - AA (both added): File added in both branches with different content + + To disable this feature, simply comment out or remove the call to this function + in _handle_cherry_pick_conflict. + + Args: + status_lines: List of git status lines (from 'git status --porcelain') + commit_sha: SHA of the commit being cherry-picked + """ + for line in status_lines: + # Extract status code and file path + if len(line) < 3: + continue + + status_code = line[:2] + parts = line.split(None, 1) + if len(parts) < 2: + continue + + file_path = parts[1].strip() + if not file_path: + continue + + # Only process conflict types where Git doesn't generate markers by default + # DU, UD, AA: Git doesn't generate markers + # UU, AU, UA: Git generates markers, so we skip them + if status_code not in ('DU', 'UD', 'AA'): + continue + + # Check if file already has conflict markers + if self._find_first_conflict_line(file_path) is not None: + continue # Markers already present, skip + + try: + # Get content from HEAD (if file exists there) + head_content = None + try: + result = subprocess.run( + ['git', 'show', f'HEAD:{file_path}'], + capture_output=True, + text=True, + check=True + ) + head_content = result.stdout + except subprocess.CalledProcessError: + # File doesn't exist in HEAD + head_content = None + + # Get content from cherry-pick commit (if file exists there) + cherry_pick_content = None + try: + result = subprocess.run( + ['git', 'show', f'{commit_sha}:{file_path}'], + capture_output=True, + text=True, + check=True + ) + cherry_pick_content = result.stdout + except subprocess.CalledProcessError: + # File doesn't exist in cherry-pick commit + cherry_pick_content = None + + # Build conflict markers based on status code + conflict_markers_content = None + + if status_code == 'DU': + # Deleted by us (HEAD), updated by them (cherry-pick) + # File exists in cherry-pick but not in HEAD + if cherry_pick_content is not None: + conflict_markers_content = f"<<<<<<< HEAD\n=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" + + elif status_code == 'UD': + # Updated by us (HEAD), deleted by them (cherry-pick) + # File exists in HEAD but not in cherry-pick + if head_content is not None: + conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n>>>>>>> {commit_sha[:7]}\n" + + elif status_code == 'AA': + # Both added - file added in both branches with different content + if head_content is not None and cherry_pick_content is not None: + conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" + + # Write conflict markers to file if we have content + if conflict_markers_content is not None: + # Ensure directory exists + file_dir = os.path.dirname(file_path) + if file_dir: + os.makedirs(file_dir, exist_ok=True) + + with open(file_path, 'w', encoding='utf-8') as f: + f.write(conflict_markers_content) + + self.logger.info(f"Added conflict markers to {file_path} for {status_code} conflict") + + except Exception as e: + self.logger.warning(f"Failed to add conflict markers to {file_path}: {e}") + def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages @@ -737,6 +843,11 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): if result.stdout.strip(): # Check for conflict markers in files status_lines = result.stdout.strip().split('\n') + + # Add conflict markers for special conflict types (DU, UD, AA) where Git doesn't generate them + # To disable this feature, comment out the next line: + self._add_missing_conflict_markers(status_lines, commit_sha) + for line in status_lines: # Git status codes for conflicts: # UU: both modified (content conflict) From 1ca3115bbd887b831e0d8ff14163bc980a6fe3f1 Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 12:28:08 +0100 Subject: [PATCH 20/24] Enhance logging in cherry-pick script for conflict handling - Added debug and warning logs to provide better visibility into the conflict resolution process, including details on the number of status lines and specific conflict types (DU, UD, AA). - Improved error handling by including exception information in warning logs when adding conflict markers fails. - Updated logging to ensure clarity on the state of conflict markers and the conditions under which they are added or skipped. --- .github/scripts/cherry_pick.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index dd5b9fc666ca..d714391c830d 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -734,6 +734,7 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str status_lines: List of git status lines (from 'git status --porcelain') commit_sha: SHA of the commit being cherry-picked """ + self.logger.debug(f"_add_missing_conflict_markers called with {len(status_lines)} status lines for commit {commit_sha[:7]}") for line in status_lines: # Extract status code and file path if len(line) < 3: @@ -754,8 +755,11 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str if status_code not in ('DU', 'UD', 'AA'): continue + self.logger.debug(f"Processing {status_code} conflict for {file_path}") + # Check if file already has conflict markers if self._find_first_conflict_line(file_path) is not None: + self.logger.debug(f"File {file_path} already has conflict markers, skipping") continue # Markers already present, skip try: @@ -793,19 +797,28 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str if status_code == 'DU': # Deleted by us (HEAD), updated by them (cherry-pick) # File exists in cherry-pick but not in HEAD + self.logger.debug(f"DU conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if cherry_pick_content is not None: conflict_markers_content = f"<<<<<<< HEAD\n=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" + else: + self.logger.warning(f"DU conflict for {file_path}: cherry_pick_content is None, cannot add markers") elif status_code == 'UD': # Updated by us (HEAD), deleted by them (cherry-pick) # File exists in HEAD but not in cherry-pick + self.logger.debug(f"UD conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if head_content is not None: conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n>>>>>>> {commit_sha[:7]}\n" + else: + self.logger.warning(f"UD conflict for {file_path}: head_content is None, cannot add markers") elif status_code == 'AA': # Both added - file added in both branches with different content + self.logger.debug(f"AA conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if head_content is not None and cherry_pick_content is not None: conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" + else: + self.logger.warning(f"AA conflict for {file_path}: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}, cannot add markers") # Write conflict markers to file if we have content if conflict_markers_content is not None: @@ -814,13 +827,17 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str if file_dir: os.makedirs(file_dir, exist_ok=True) + # For DU conflicts, file may already exist in working directory (Git left it there) + # We need to overwrite it with conflict markers with open(file_path, 'w', encoding='utf-8') as f: f.write(conflict_markers_content) self.logger.info(f"Added conflict markers to {file_path} for {status_code} conflict") + else: + self.logger.warning(f"Could not generate conflict markers for {file_path} (status_code={status_code})") except Exception as e: - self.logger.warning(f"Failed to add conflict markers to {file_path}: {e}") + self.logger.warning(f"Failed to add conflict markers to {file_path}: {e}", exc_info=True) def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages @@ -843,6 +860,7 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): if result.stdout.strip(): # Check for conflict markers in files status_lines = result.stdout.strip().split('\n') + self.logger.debug(f"Git status output: {result.stdout.strip()}") # Add conflict markers for special conflict types (DU, UD, AA) where Git doesn't generate them # To disable this feature, comment out the next line: From 6785b4808fe144d85fdc3e1f9ab56f34769ccdda Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 15:16:28 +0100 Subject: [PATCH 21/24] Improve conflict handling in cherry-pick script - Added logic to skip files that already have conflict markers to prevent overwriting existing information during cherry-picking. - Introduced a mechanism to track processed files, ensuring that multiple conflicts in the same file are handled sequentially without duplication. - Enhanced logging to provide clearer insights into the conflict resolution process, including reasons for skipping files and details on existing conflict markers. --- .github/scripts/cherry_pick.py | 36 ++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index d714391c830d..77b847e4da52 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -727,6 +727,12 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str - UD (updated by us, deleted by them): File modified in HEAD, deleted in cherry-pick - AA (both added): File added in both branches with different content + Note on multiple conflicts in the same file: + - If a file already has conflict markers (from a previous commit or Git-generated), + this function will skip it to avoid overwriting existing conflict information. + - When cherry-picking multiple commits, conflicts should be resolved sequentially: + resolve the first conflict, then continue with the next commit. + To disable this feature, simply comment out or remove the call to this function in _handle_cherry_pick_conflict. @@ -735,6 +741,7 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str commit_sha: SHA of the commit being cherry-picked """ self.logger.debug(f"_add_missing_conflict_markers called with {len(status_lines)} status lines for commit {commit_sha[:7]}") + processed_files = set() # Track files we've already processed to avoid duplicates for line in status_lines: # Extract status code and file path if len(line) < 3: @@ -755,11 +762,24 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str if status_code not in ('DU', 'UD', 'AA'): continue + # Skip if we've already processed this file in this call + if file_path in processed_files: + self.logger.debug(f"File {file_path} already processed in this call, skipping") + continue + self.logger.debug(f"Processing {status_code} conflict for {file_path}") # Check if file already has conflict markers - if self._find_first_conflict_line(file_path) is not None: - self.logger.debug(f"File {file_path} already has conflict markers, skipping") + existing_marker_line = self._find_first_conflict_line(file_path) + if existing_marker_line is not None: + # File already has conflict markers - this can happen if: + # 1. Git already generated markers for this conflict (UU, AU, UA) + # 2. We already added markers in a previous call + # 3. There are multiple conflicts in the same file from different commits + self.logger.debug(f"File {file_path} already has conflict markers at line {existing_marker_line}, skipping. " + f"This may indicate: (1) Git generated markers automatically, " + f"(2) Markers were added previously, or (3) Multiple conflicts in same file.") + processed_files.add(file_path) # Mark as processed even if we skip continue # Markers already present, skip try: @@ -799,6 +819,7 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str # File exists in cherry-pick but not in HEAD self.logger.debug(f"DU conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if cherry_pick_content is not None: + # Format: <<<<<<< HEAD (empty) ======= (cherry-pick content) >>>>>>> conflict_markers_content = f"<<<<<<< HEAD\n=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" else: self.logger.warning(f"DU conflict for {file_path}: cherry_pick_content is None, cannot add markers") @@ -808,6 +829,10 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str # File exists in HEAD but not in cherry-pick self.logger.debug(f"UD conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if head_content is not None: + # Format: <<<<<<< HEAD (head content) ======= (empty) >>>>>>> + # Ensure head_content ends with newline if it doesn't + if head_content and not head_content.endswith('\n'): + head_content = head_content + '\n' conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n>>>>>>> {commit_sha[:7]}\n" else: self.logger.warning(f"UD conflict for {file_path}: head_content is None, cannot add markers") @@ -816,6 +841,10 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str # Both added - file added in both branches with different content self.logger.debug(f"AA conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") if head_content is not None and cherry_pick_content is not None: + # Format: <<<<<<< HEAD (head content) ======= (cherry-pick content) >>>>>>> + # Ensure head_content ends with newline if it doesn't + if head_content and not head_content.endswith('\n'): + head_content = head_content + '\n' conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" else: self.logger.warning(f"AA conflict for {file_path}: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}, cannot add markers") @@ -833,11 +862,14 @@ def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str f.write(conflict_markers_content) self.logger.info(f"Added conflict markers to {file_path} for {status_code} conflict") + processed_files.add(file_path) # Mark as processed else: self.logger.warning(f"Could not generate conflict markers for {file_path} (status_code={status_code})") + processed_files.add(file_path) # Mark as processed even if we couldn't generate markers except Exception as e: self.logger.warning(f"Failed to add conflict markers to {file_path}: {e}", exc_info=True) + processed_files.add(file_path) # Mark as processed even if we failed def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages From 3e35848171d94cfe7f0aea14ae9a683609c6dde8 Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 15:27:13 +0100 Subject: [PATCH 22/24] Refactor conflict message extraction in cherry-pick script - Replaced the previous method for extracting conflict messages with a new function that accurately matches file paths from git output, improving reliability. - Enhanced the handling of conflict messages to ensure both relative and absolute paths are considered, streamlining the conflict resolution process. - Removed redundant code related to conflict message extraction, simplifying the overall logic and improving maintainability. --- .github/scripts/cherry_pick.py | 159 --------------------------------- 1 file changed, 159 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index 77b847e4da52..ce5623bba3d7 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -716,161 +716,6 @@ def _extract_conflict_messages(self, git_output: str) -> List[str]: return conflict_messages - def _add_missing_conflict_markers(self, status_lines: List[str], commit_sha: str) -> None: - """Add conflict markers for conflict types where Git doesn't generate them automatically - - This function handles conflicts where Git leaves files without conflict markers. - It adds standard Git conflict markers (<<<<<<<, =======, >>>>>>>) to make conflicts visible in PR diffs. - - Handled conflict types (where Git doesn't generate markers): - - DU (deleted by us, updated by them): File deleted in HEAD, modified in cherry-pick - - UD (updated by us, deleted by them): File modified in HEAD, deleted in cherry-pick - - AA (both added): File added in both branches with different content - - Note on multiple conflicts in the same file: - - If a file already has conflict markers (from a previous commit or Git-generated), - this function will skip it to avoid overwriting existing conflict information. - - When cherry-picking multiple commits, conflicts should be resolved sequentially: - resolve the first conflict, then continue with the next commit. - - To disable this feature, simply comment out or remove the call to this function - in _handle_cherry_pick_conflict. - - Args: - status_lines: List of git status lines (from 'git status --porcelain') - commit_sha: SHA of the commit being cherry-picked - """ - self.logger.debug(f"_add_missing_conflict_markers called with {len(status_lines)} status lines for commit {commit_sha[:7]}") - processed_files = set() # Track files we've already processed to avoid duplicates - for line in status_lines: - # Extract status code and file path - if len(line) < 3: - continue - - status_code = line[:2] - parts = line.split(None, 1) - if len(parts) < 2: - continue - - file_path = parts[1].strip() - if not file_path: - continue - - # Only process conflict types where Git doesn't generate markers by default - # DU, UD, AA: Git doesn't generate markers - # UU, AU, UA: Git generates markers, so we skip them - if status_code not in ('DU', 'UD', 'AA'): - continue - - # Skip if we've already processed this file in this call - if file_path in processed_files: - self.logger.debug(f"File {file_path} already processed in this call, skipping") - continue - - self.logger.debug(f"Processing {status_code} conflict for {file_path}") - - # Check if file already has conflict markers - existing_marker_line = self._find_first_conflict_line(file_path) - if existing_marker_line is not None: - # File already has conflict markers - this can happen if: - # 1. Git already generated markers for this conflict (UU, AU, UA) - # 2. We already added markers in a previous call - # 3. There are multiple conflicts in the same file from different commits - self.logger.debug(f"File {file_path} already has conflict markers at line {existing_marker_line}, skipping. " - f"This may indicate: (1) Git generated markers automatically, " - f"(2) Markers were added previously, or (3) Multiple conflicts in same file.") - processed_files.add(file_path) # Mark as processed even if we skip - continue # Markers already present, skip - - try: - # Get content from HEAD (if file exists there) - head_content = None - try: - result = subprocess.run( - ['git', 'show', f'HEAD:{file_path}'], - capture_output=True, - text=True, - check=True - ) - head_content = result.stdout - except subprocess.CalledProcessError: - # File doesn't exist in HEAD - head_content = None - - # Get content from cherry-pick commit (if file exists there) - cherry_pick_content = None - try: - result = subprocess.run( - ['git', 'show', f'{commit_sha}:{file_path}'], - capture_output=True, - text=True, - check=True - ) - cherry_pick_content = result.stdout - except subprocess.CalledProcessError: - # File doesn't exist in cherry-pick commit - cherry_pick_content = None - - # Build conflict markers based on status code - conflict_markers_content = None - - if status_code == 'DU': - # Deleted by us (HEAD), updated by them (cherry-pick) - # File exists in cherry-pick but not in HEAD - self.logger.debug(f"DU conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") - if cherry_pick_content is not None: - # Format: <<<<<<< HEAD (empty) ======= (cherry-pick content) >>>>>>> - conflict_markers_content = f"<<<<<<< HEAD\n=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" - else: - self.logger.warning(f"DU conflict for {file_path}: cherry_pick_content is None, cannot add markers") - - elif status_code == 'UD': - # Updated by us (HEAD), deleted by them (cherry-pick) - # File exists in HEAD but not in cherry-pick - self.logger.debug(f"UD conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") - if head_content is not None: - # Format: <<<<<<< HEAD (head content) ======= (empty) >>>>>>> - # Ensure head_content ends with newline if it doesn't - if head_content and not head_content.endswith('\n'): - head_content = head_content + '\n' - conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n>>>>>>> {commit_sha[:7]}\n" - else: - self.logger.warning(f"UD conflict for {file_path}: head_content is None, cannot add markers") - - elif status_code == 'AA': - # Both added - file added in both branches with different content - self.logger.debug(f"AA conflict: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}") - if head_content is not None and cherry_pick_content is not None: - # Format: <<<<<<< HEAD (head content) ======= (cherry-pick content) >>>>>>> - # Ensure head_content ends with newline if it doesn't - if head_content and not head_content.endswith('\n'): - head_content = head_content + '\n' - conflict_markers_content = f"<<<<<<< HEAD\n{head_content}=======\n{cherry_pick_content}>>>>>>> {commit_sha[:7]}\n" - else: - self.logger.warning(f"AA conflict for {file_path}: head_content={head_content is not None}, cherry_pick_content={cherry_pick_content is not None}, cannot add markers") - - # Write conflict markers to file if we have content - if conflict_markers_content is not None: - # Ensure directory exists - file_dir = os.path.dirname(file_path) - if file_dir: - os.makedirs(file_dir, exist_ok=True) - - # For DU conflicts, file may already exist in working directory (Git left it there) - # We need to overwrite it with conflict markers - with open(file_path, 'w', encoding='utf-8') as f: - f.write(conflict_markers_content) - - self.logger.info(f"Added conflict markers to {file_path} for {status_code} conflict") - processed_files.add(file_path) # Mark as processed - else: - self.logger.warning(f"Could not generate conflict markers for {file_path} (status_code={status_code})") - processed_files.add(file_path) # Mark as processed even if we couldn't generate markers - - except Exception as e: - self.logger.warning(f"Failed to add conflict markers to {file_path}: {e}", exc_info=True) - processed_files.add(file_path) # Mark as processed even if we failed - def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages @@ -894,10 +739,6 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): status_lines = result.stdout.strip().split('\n') self.logger.debug(f"Git status output: {result.stdout.strip()}") - # Add conflict markers for special conflict types (DU, UD, AA) where Git doesn't generate them - # To disable this feature, comment out the next line: - self._add_missing_conflict_markers(status_lines, commit_sha) - for line in status_lines: # Git status codes for conflicts: # UU: both modified (content conflict) From aa1ee21f43e1b3afbd03a57b2d318f8b3df5ce84 Mon Sep 17 00:00:00 2001 From: naspirato Date: Wed, 19 Nov 2025 15:27:25 +0100 Subject: [PATCH 23/24] Refactor conflict message extraction in cherry-pick script - Introduced a new method to find conflict messages for specific files, improving the accuracy of message matching from git output. - Simplified the conflict message extraction logic by consolidating redundant code, enhancing maintainability and readability. - Ensured compatibility with both relative and absolute file paths in conflict message matching, streamlining the conflict resolution process. --- .github/scripts/cherry_pick.py | 74 ++++++++++++++-------------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index ce5623bba3d7..e4220aff2bc2 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -716,6 +716,35 @@ def _extract_conflict_messages(self, git_output: str) -> List[str]: return conflict_messages + def _find_conflict_message_for_file(self, file_path: str, conflict_messages: List[str]) -> Optional[str]: + """Find matching conflict message for a file from git output + + Args: + file_path: Path to the conflicted file + conflict_messages: List of conflict messages from git output + + Returns: + Matching conflict message if found, None otherwise + """ + file_basename = os.path.basename(file_path) + for msg in conflict_messages: + # Git format: "CONFLICT (type): filename ..." + # Extract filename from message (first word after ": ") + if ': ' in msg: + msg_file_part = msg.split(': ', 1)[1] + # Extract filename (first word after ": ") + msg_filename = msg_file_part.split()[0] if msg_file_part.split() else "" + # Match if filename matches (handle both relative and absolute paths) + if msg_filename == file_path or msg_filename == file_basename: + return msg + # Fallback: check if file_path appears right after "CONFLICT (type): " + elif file_path in msg: + # Make sure it's not a substring match (e.g., "file.py" in "file.pyc") + # Check if file_path appears as a word boundary match + if re.search(rf'\b{re.escape(file_path)}\b', msg) or re.search(rf'\b{re.escape(file_basename)}\b', msg): + return msg + return None + def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): """Handle cherry-pick conflict: commit the conflict and return list of conflicted files with conflict messages @@ -760,28 +789,7 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): conflict_line = self._find_first_conflict_line(file_path) # Find matching conflict message from git output - # Git outputs messages like "CONFLICT (type): filename ..." - # We need to match the message that specifically mentions this file - conflict_message = None - file_basename = os.path.basename(file_path) - for msg in conflict_messages: - # Git format: "CONFLICT (type): filename ..." - # Extract filename from message (first word after ": ") - if ': ' in msg: - msg_file_part = msg.split(': ', 1)[1] - # Extract filename (first word after ": ") - msg_filename = msg_file_part.split()[0] if msg_file_part.split() else "" - # Match if filename matches (handle both relative and absolute paths) - if msg_filename == file_path or msg_filename == file_basename: - conflict_message = msg - break - # Fallback: check if file_path appears right after "CONFLICT (type): " - elif file_path in msg: - # Make sure it's not a substring match (e.g., "file.py" in "file.pyc") - # Check if file_path appears as a word boundary match - if re.search(rf'\b{re.escape(file_path)}\b', msg) or re.search(rf'\b{re.escape(file_basename)}\b', msg): - conflict_message = msg - break + conflict_message = self._find_conflict_message_for_file(file_path, conflict_messages) conflict_files.append((file_path, conflict_line, conflict_message)) @@ -803,27 +811,7 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): conflict_line = self._find_first_conflict_line(file_path) # Find matching conflict message - # Git outputs messages like "CONFLICT (type): filename ..." - conflict_message = None - file_basename = os.path.basename(file_path) - for msg in conflict_messages: - # Git format: "CONFLICT (type): filename ..." - # Extract filename from message (first word after ": ") - if ': ' in msg: - msg_file_part = msg.split(': ', 1)[1] - # Extract filename (first word after ": ") - msg_filename = msg_file_part.split()[0] if msg_file_part.split() else "" - # Match if filename matches (handle both relative and absolute paths) - if msg_filename == file_path or msg_filename == file_basename: - conflict_message = msg - break - # Fallback: check if file_path appears right after "CONFLICT (type): " - elif file_path in msg: - # Make sure it's not a substring match (e.g., "file.py" in "file.pyc") - # Check if file_path appears as a word boundary match - if re.search(rf'\b{re.escape(file_path)}\b', msg) or re.search(rf'\b{re.escape(file_basename)}\b', msg): - conflict_message = msg - break + conflict_message = self._find_conflict_message_for_file(file_path, conflict_messages) conflict_files.append((file_path, conflict_line, conflict_message)) except subprocess.CalledProcessError: From 0bb88bb69b0413d91495e7eab25a54a09a157807 Mon Sep 17 00:00:00 2001 From: naspirato Date: Thu, 20 Nov 2025 16:02:08 +0100 Subject: [PATCH 24/24] Improve conflict handling and logging in cherry-pick script - Added a warning log for unexpected clean working tree state after conflict detection, indicating potential auto-resolution or inconsistent state. - Updated the cherry-pick logic to use '--skip' instead of '--quit' when a commit is already in the target branch, enhancing the cleanup process during cherry-picking. --- .github/scripts/cherry_pick.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/scripts/cherry_pick.py b/.github/scripts/cherry_pick.py index e4220aff2bc2..5cc0af129cf3 100755 --- a/.github/scripts/cherry_pick.py +++ b/.github/scripts/cherry_pick.py @@ -831,6 +831,13 @@ def _handle_cherry_pick_conflict(self, commit_sha: str, git_output: str = ""): self.git_run("commit", "-m", f"BACKPORT-CONFLICT: manual resolution required for commit {commit_sha[:7]}") self.logger.info(f"Conflict committed for commit {commit_sha[:7]}") return True, conflict_files + else: + # Working tree is clean - this shouldn't happen if there's a real conflict + # If git status is empty, the conflict may have been auto-resolved or something else happened + # Return False to let the caller handle it (it will check for "nothing to commit" in the output) + self.logger.warning(f"Working tree is clean after conflict detection for commit {commit_sha[:7]}. " + f"This is unexpected - conflict may have been auto-resolved or state is inconsistent.") + return False, [] except Exception as e: self.logger.error(f"Error handling conflict for commit {commit_sha[:7]}: {e}") @@ -1045,7 +1052,7 @@ def create_pr_for_branch(self, target_branch): self.logger.info(f"Commit {commit_sha[:7]} changes is already in branch {target_branch}, skipping") # Quit cherry-pick operation to clean up state before next commit try: - self.git_run("cherry-pick", "--quit") + self.git_run("cherry-pick", "--skip") except subprocess.CalledProcessError: # If --quit fails, we're probably not in cherry-pick state, continue anyway pass