Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 157 additions & 1 deletion src/vcspull/cli/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from __future__ import annotations

import logging
import subprocess
import sys
import typing as t
from copy import deepcopy

from libvcs import exc as libvcs_exc
from libvcs._internal.shortcuts import create_project
from libvcs.url import registry as url_tools

Expand All @@ -23,6 +25,22 @@

log = logging.getLogger(__name__)

BRANCH_ERROR_MSG = """
⚠️ Error syncing '{repo_name}': Branch issue detected

The repository appears to be on a local branch with no remote tracking.

To fix this, add a 'rev' field to your vcspull configuration:

{repo_name}:
repo: {repo_url}
rev: main # or specify your desired branch

Alternatively, switch to a branch that exists remotely:
cd {repo_path}
git checkout main # or another remote branch{available_branches}
"""


def clamp(n: int, _min: int, _max: int) -> int:
"""Clamp a number between a min and max value."""
Expand Down Expand Up @@ -142,6 +160,130 @@ def __init__(self, repo_url: str, *args: object, **kwargs: object) -> None:
return super().__init__(f"Could not automatically determine VCS for {repo_url}")


def check_branch_state(repo_dict: dict[str, t.Any]) -> bool:
"""Check if repository is in a problematic branch state.

Returns True if there are potential branch issues that should be addressed
before attempting to sync.

Parameters
----------
repo_dict : dict
Repository configuration dictionary

Returns
-------
bool
True if branch issues detected, False otherwise
"""
repo_path = repo_dict.get("path")
if not repo_path:
return False

try:
# Check if we're in a git repository
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): We've found these issues:

git_dir = subprocess.run(
["git", "-C", str(repo_path), "rev-parse", "--git-dir"],
capture_output=True,
text=True,
timeout=5,
check=False,
)
if git_dir.returncode != 0:
return False # Not a git repo or git not available

# Get current branch
current_branch = subprocess.run(
["git", "-C", str(repo_path), "branch", "--show-current"],
capture_output=True,
text=True,
timeout=5,
check=False,
)
if current_branch.returncode != 0:
return False # Can't determine current branch

branch_name = current_branch.stdout.strip()
if not branch_name:
return False # Detached HEAD or other edge case

# Check if current branch has upstream tracking
upstream_ref = f"{branch_name}@{{upstream}}"
upstream = subprocess.run(
["git", "-C", str(repo_path), "rev-parse", "--abbrev-ref", upstream_ref],
capture_output=True,
text=True,
timeout=5,
check=False,
)

if upstream.returncode != 0:
# No upstream tracking - this is potentially problematic
# Even if the remote branch exists, the lack of upstream tracking
# can cause libvcs to construct invalid remote references
# Check if there's a remote branch with the same name
remote_ref = f"origin/{branch_name}"
remote_branch = subprocess.run(
Comment on lines +225 to +226
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Assumes 'origin' as remote; may not be correct for all repos.

This approach may fail in repositories where the remote is not named 'origin'. Allow configuring the remote name or detect it automatically.

Suggested implementation:

            # Even if the remote branch exists, the lack of upstream tracking
            # can cause libvcs to construct invalid remote references
            # Check if there's a remote branch with the same name

            # Detect remote name automatically
            remote_name_proc = subprocess.run(
                ["git", "-C", str(repo_path), "remote"],
                capture_output=True,
                text=True,
                timeout=5,
                check=False,
            )
            remote_name = "origin"
            if remote_name_proc.returncode == 0:
                remotes = remote_name_proc.stdout.strip().splitlines()
                if remotes:
                    remote_name = remotes[0]  # Use the first remote found

            remote_ref = f"{remote_name}/{branch_name}"
            remote_branch = subprocess.run(
                ["git", "-C", str(repo_path), "rev-parse", "--verify", remote_ref],
                capture_output=True,

If you want to allow the remote name to be configured (e.g., via a function argument or environment variable), you should:

  1. Add a parameter to the relevant function to accept remote_name, defaulting to None.
  2. If remote_name is provided, use it; otherwise, detect as above.
  3. Optionally, check for an environment variable (e.g., VCS_REMOTE_NAME) before falling back to detection.

You may need to propagate this parameter through function calls or configuration.

["git", "-C", str(repo_path), "rev-parse", "--verify", remote_ref],
capture_output=True,
text=True,
timeout=5,
check=False,
)
if remote_branch.returncode != 0:
# Current branch doesn't exist on remote - this will cause issues
return True
# Branch exists on remote but no upstream tracking
# This can still cause issues with libvcs's remote name detection
return True
else:
return False # Branch state looks good

except (subprocess.TimeoutExpired, FileNotFoundError):
return False # Git command failed or not available


def handle_branch_error(repo_dict: dict[str, t.Any]) -> None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Consider logging errors instead of only printing to stderr.

Using the logger will improve error traceability and make integration with monitoring tools easier.

Suggested implementation:

import logging
import subprocess
import sys
import typing as t
from copy import deepcopy

logger = logging.getLogger(__name__)
def handle_branch_error(repo_dict: dict[str, t.Any]) -> None:
    """Handle branch-related errors by showing helpful instructions.

    logger.error(
        "Branch-related error detected for repository: %s\n"
        "Please check your branch configuration and ensure upstream tracking is set correctly.",
        repo_dict.get("name", "<unknown>")
    )

"""Handle branch-related errors by showing helpful instructions.

Parameters
----------
repo_dict : dict
Repository configuration dictionary
"""
repo_name = repo_dict.get("name", "repository")
repo_url = repo_dict.get("url", "YOUR_REPO_URL")
repo_path = repo_dict.get("path", "REPO_PATH")

# Try to get available remote branches
available_branches = ""
try:
cmd = ["git", "-C", str(repo_path), "branch", "-r"]
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=5, check=False
)
Comment on lines +262 to +264
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (python.lang.security.audit.dangerous-subprocess-use-audit): Detected subprocess function 'run' without a static string. If this data can be controlled by a malicious actor, it may be an instance of command injection. Audit the use of this call to ensure it is not controllable by an external resource. You may consider using 'shlex.escape()'.

Source: opengrep

if result.returncode == 0 and result.stdout:
branches = [
line.strip().replace("origin/", "")
for line in result.stdout.splitlines()
if "origin/" in line and "->" not in line
][:5] # Show first 5 branches
Comment on lines +266 to +270
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

if branches:
branch_list = ", ".join(branches)
available_branches = f"\n\nAvailable remote branches: {branch_list}"
except (subprocess.TimeoutExpired, FileNotFoundError):
pass # Git command failed or not available

error_msg = BRANCH_ERROR_MSG.format(
repo_name=repo_name,
repo_url=repo_url,
repo_path=repo_path,
available_branches=available_branches,
)

print(error_msg, file=sys.stderr)


def update_repo(
repo_dict: t.Any,
# repo_dict: Dict[str, Union[str, Dict[str, GitRemote], pathlib.Path]]
Expand All @@ -162,7 +304,21 @@ def update_repo(
repo_dict["vcs"] = vcs

r = create_project(**repo_dict) # Creates the repo object
r.update_repo(set_remotes=True) # Creates repo if not exists and fetches

# Check for branch issues before attempting to sync
if check_branch_state(repo_dict):
handle_branch_error(repo_dict)
# Still attempt the sync but user has been warned

try:
r.update_repo(set_remotes=True) # Creates repo if not exists and fetches
except libvcs_exc.CommandError as e:
error_msg = str(e).lower()
# Check for branch-related errors (fallback in case pre-check missed something)
if "invalid upstream" in error_msg or "ambiguous argument" in error_msg:
handle_branch_error(repo_dict)
raise
raise

# TODO: Fix this
return r # type:ignore
Loading