In [2]:
import logging
import os
import re
import shutil
import subprocess
import textwrap
import toml

from multiprocessing.pool import Pool
from typing import Tuple, Callable, Type, Any

logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(threadName)s:%(message)s')

In [3]:
# Path to store package registry and the Git executable
packages_path = 'K:\\julia_packages2'
git = 'K:\\Git\\bin\\git.exe'

In [4]:
# Set the URL to the Julia default package registry and clone/fetch the latest registry
reg_url = 'https://github.com/JuliaRegistries/General.git'
reg_path = os.path.join(packages_path, 'General')
if os.path.exists(reg_path):
    subprocess.run([git, 'pull'], cwd=reg_path)
else:
    subprocess.run([git, 'clone', reg_url, reg_path])

In [5]:
# Parse the registry TOML file into a really big dictionary
with open(os.path.join(reg_path, 'Registry.toml')) as infile:
    registry_data = toml.load(infile)

In [6]:
# Walk through the TOML dictionary, grabbing the package names and their Git repo URLs
pkg_download = {}
for pkg in sorted(registry_data['packages'].values(), key=lambda x: x['name']):
    with open(os.path.join(reg_path, pkg['path'], 'Package.toml')) as infile:
        pkg_toml = toml.load(infile)
    pkg_repo = pkg_toml['repo'].replace('git://', 'https://')
    pkg_download[pkg_toml['name']] = pkg_repo

In [7]:
def clone_mirror(pkg_repo: str, packages_path: str = packages_path,
                 redo: bool = False) -> Tuple[str, int]:

    """Performs a mirror Git clone of a repository.
    
    Args:
        pkg_repo: The URL of the Git repository to clone.
        packages_path: The path to the local directory to clone into.
        redo: If a clone exists, delete it and re-clone, otherwise do nothing.
      
    Returns:
        pkg_repo: The input `pkg_repo` parameter.
        ret: The return code from Git, or ``None`` if `redo` was ``False`` and
            the clone existed.
    
    """

    pkg_repo_base = os.path.basename(pkg_repo)
    pkg_path = os.path.join(packages_path, pkg_repo_base)

    ret = None
    
    if redo:
        try:
            shutil.rmtree(pkg_path)
        except FileNotFoundError:
            pass
    
    if not os.path.exists(pkg_path):
        logging.info('Cloning %s from %s', pkg_repo_base, pkg_repo)
        ret = subprocess.run([git, 'clone', '--mirror', pkg_repo, pkg_path]).returncode
    
    return pkg_repo, ret

In [None]:
# Async loop over the package URLs, cloning each one and storing the return codes in a dictionary
with Pool() as pool:
    results = dict(pool.imap(clone_mirror, pkg_download.values()))

In [None]:
# Loop through the return codes. If a code was nonzero, try to clone again!
for pkg_repo, ret in results.items():
    if ret is not None and ret != 0:
        clone_mirror(pkg_repo)

In [None]:
def fetch(pkg_repo: str, packages_path: str = packages_path) -> Tuple[str, int]:

    """Performs a fetch in a local Git repository, whose location is
    determined from the remote URL.
    
    Args:
        pkg_repo: The URL of the remote Git repository.
        packages_path: The path to the local directory where the clone is located.
      
    Returns:
        pkg_repo: The input `pkg_repo` parameter.
        ret: The return code from Git, or ``None`` if the clone doesn't exist.
    
    """

    pkg_repo_base = os.path.basename(pkg_repo)
    pkg_path = os.path.join(packages_path, pkg_repo_base)

    ret = None
    
    if os.path.exists(pkg_path):
        logging.info('Fetching in %s', pkg_path)
        ret = subprocess.run([git, 'fetch'], cwd=pkg_path).returncode
    
    return pkg_repo, ret

In [None]:
# Async loop over the package URLs, fetching on each one to ensure we have the latest and the clone is good
with Pool() as pool:
    results = dict(pool.imap(fetch, pkg_download.values()))

In [None]:
# Loop through the return codes. If a code was nonzero, try to fetch again!
# Anything that fails at this point is either an invalid URL or we have bad creds!
for pkg_repo, ret in results.items():
    if ret is not None and ret != 0:
        fetch(pkg_repo)

In [None]:
def onerror(func: Callable, path: str, exc_info: Tuple[Type, Exception, Any]):
    
    """Error handler for ``shutil.rmtree``.

    If the error is due to an access error (read only file)
    it attempts to add write permission and then retries.

    If the error is for another reason it re-raises the error.

    Usage : ``shutil.rmtree(path, onerror=onerror)``
    
    """
    
    import stat
    if not os.access(path, os.W_OK):
        # Is the error an access error ?
        os.chmod(path, stat.S_IWUSR)
        func(path)
    else:
        raise

In [None]:
# The Julia language repo needs to download some dependencies when it's built.
# This is normally done using "make -d deps getall", but we don't have GNU make
# on the IAS. So we have to walk the Makefiles ourselves and manuall download
# the dependencies so we can ingress them, too!

# Clone Julia from the bare repo
julia_repo = os.path.join(packages_path, 'julia.git')
julia_path = os.path.join(packages_path, 'julia')
if os.path.exists(julia_path):
    shutil.rmtree(julia_path, onerror=onerror)
subprocess.run([git, 'clone', julia_repo, julia_path])

In [None]:
# Run GNU make to get the dependencies
pwd = os.getcwd()
os.chdir(os.path.join(julia_path, 'deps'))
ret = os.system(f"make getall")
os.chdir(pwd)
if ret != 0:
    raise RuntimeError("Getting Julia dependencies failed!")

In [None]:
# Zip up the directory to transfer to NNPP
topdir = os.path.realpath(os.path.dirname(packages_path))
subprocess.run(['tar', 'czf', 'julia_packages.tar.gz', 'julia_packages'], cwd=topdir)