In [1]:
import logging
import os
import re
import shutil
import signal
import subprocess
import sys
import textwrap
import urllib.parse

import progressbar
import psutil
import toml

from typing import Tuple, Callable, Type, Any

logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(threadName)s:%(message)s')

In [2]:
# Path to store package registry and the Git executable
packages_path = r'K:\julia_packages'
git = r'K:\Git\bin'
gnu_tools = r'K:\GnuUtils\bin'

os.environ['PATH'] = os.pathsep.join([git, gnu_tools] + os.environ['PATH'].split(os.pathsep))

In [3]:
def kill_proc_tree(pid: int, sig=signal.SIGTERM, include_parent=True,
                   timeout=None, on_terminate=None):
    if include_parent:
        assert pid != os.getpid(), "won't kill myself"
    parent = psutil.Process(pid)
    children = parent.children(recursive=True)
    if include_parent:
        children.append(parent)
    for p in children:
        p.send_signal(sig)
    gone, alive = psutil.wait_procs(children, timeout=timeout,
                                    callback=on_terminate)
    return (gone, alive)

def popen_send_signal(self, sig):
    if self.returncode is None:
        kill_proc_tree(self.pid, sig)
subprocess.Popen.send_signal = popen_send_signal

In [15]:
# Function to run commands with timeout protection
def run(*args, **kwargs):
    success = False
    while not success:
        try:
            print(f"Running {' '.join(args)} ({' '.join(f'{key}={val}' for key, val in kwargs.items())})",
                  file=sys.__stdout__)
            ret = subprocess.run(args, **kwargs)
            success = True
        except subprocess.TimeoutExpired:
            kill_proc_tree(os.getpid(), include_parent=False)
            print('*** Rerunning! ***', file=sys.__stdout__)
    return ret

In [5]:
def remove_pulls(repo_dir):
    refs = run('git', 'show-ref', cwd=repo_dir, capture_output=True).stdout.decode()
    update_ref = []
    for line in refs.splitlines():
        ref = line.split()[1]
        if 'pull' in ref:
            update_ref.append(f'delete {ref}')
    if update_ref:
        run('git', 'update-ref', '--stdin', input=('\n'.join(update_ref) + '\n').encode(), cwd=repo_dir)
        run('git', 'gc', '--auto', cwd=repo_dir, timeout=60)

In [6]:
# Set the URL to the Julia default package registry and clone/fetch the latest registry
reg_url = 'https://github.com/JuliaRegistries/General.git'
reg_path = os.path.join(packages_path, 'General')
if os.path.exists(reg_path):
    run('git', 'pull', cwd=reg_path)
else:
    run('git', 'clone', reg_url, reg_path)

In [7]:
# Set the URL to the Julia METADATA repository and clone/fetch the latest repo
metadata_url = 'https://github.com/JuliaLang/METADATA.jl.git'
metadata_path = os.path.join(packages_path, 'METADATA.jl.git')
if not os.path.exists(metadata_path):
    run('git', 'clone', '--mirror', metadata_url, metadata_path)
else:
    run('git', 'fetch', cwd=metadata_path, timeout=60)
remove_pulls(metadata_path)

In [8]:
# Parse the registry TOML file into a really big dictionary
with open(os.path.join(reg_path, 'Registry.toml')) as infile:
    registry_data = toml.load(infile)

In [9]:
# Walk through the TOML dictionary, grabbing the package names and their Git repo URLs
pkg_download = {}
for pkg in sorted(registry_data['packages'].values(), key=lambda x: x['name']):
    with open(os.path.join(reg_path, pkg['path'], 'Package.toml')) as infile:
        pkg_toml = toml.load(infile)
    url = urllib.parse.urlparse(pkg_toml['repo'])
    url = url._replace(scheme='https', netloc=':@' + url.netloc)
    pkg_repo = url.geturl()
    pkg_download[pkg_toml['name']] = pkg_repo

In [None]:
# Loop over the package URLs, cloning or fetching on each one
for pkg_repo in progressbar.progressbar(pkg_download.values()):
    pkg_repo_base = os.path.basename(pkg_repo)
    pkg_path = os.path.join(packages_path, pkg_repo_base)
    if not os.path.exists(pkg_path):
        proc = run('git', 'clone', '--mirror', pkg_repo, pkg_path, timeout=120)
    else:
        proc = run('git', 'fetch', cwd=pkg_path, timeout=10)
    if proc.returncode == 0:
        remove_pulls(pkg_path)

  3% (91 of 2736) |                      | Elapsed Time: 0:04:26 ETA:   2:23:39

In [18]:
def onerror(func: Callable, path: str, exc_info: Tuple[Type, Exception, Any]):
    
    """Error handler for ``shutil.rmtree``.

    If the error is due to an access error (read only file)
    it attempts to add write permission and then retries.

    If the error is for another reason it re-raises the error.

    Usage : ``shutil.rmtree(path, onerror=onerror)``
    
    """
    
    import stat
    if not os.access(path, os.W_OK):
        # Is the error an access error ?
        os.chmod(path, stat.S_IWUSR)
        func(path)
    else:
        raise

In [19]:
# The Julia language repo needs to download some dependencies when it's built.
# This is normally done using "make -d deps getall", but we don't have GNU make
# on the IAS. So we have to walk the Makefiles ourselves and manuall download
# the dependencies so we can ingress them, too!

# Clone Julia from the bare repo
julia_repo = os.path.join(packages_path, 'julia.git')
julia_path = os.path.join(packages_path, 'julia')
if os.path.exists(julia_path):
    shutil.rmtree(julia_path, onerror=onerror)
run('git', 'clone', julia_repo, julia_path)
rev = run('git', 'rev-list', '--tags', '--max-count=1', capture_output=True).stdout.decode().strip()
branch = run('git', 'describe', '--tags', rev, capture_output=True).stdout.decode().strip()
run('git', 'checkout', branch)

TypeError: __init__() got an unexpected keyword argument 'capture_output'

In [None]:
# Run GNU make to get the dependencies
run('make', '-C', 'deps', 'getall', 'ARCH=x86_64', cwd=julia_path)

In [None]:
# Zip up the directory to transfer to NNPP
topdir = os.path.realpath(os.path.dirname(packages_path))
basename = os.path.basename(packages_path)
run('tar', 'czf', f'{basename}.tar.gz', basename, cwd=topdir)