## Goal
Here we will try to estimate how many erros we did while guessing the right commit, for certain flatpak dependencies. More precisely, some dependencies (base app, sdk extension and flatpak builder) are not bound by a specific commit, we only know that the program was built using the latest available dependencies at the time. The problem is, we don't know the exact time of the build therefore we take what was the lastest available at ostree-commit time (after the build), which means that if a dependency was updated during this time slice, our guess will be wrong.

To estimate the probability of error, we already can bound the build time between the git commit (which is known to be before the build) and the ostree commit (which is after), we will then assume that the build is distributed uniformly inside, even if it is a bit unrealistic we cannot really guess what distribution it should have. And then if one of it's dependency had a commit in this interval, we can easily compute the probability of error. If multiple dependencies are present we just do the join error (probability that at least one is wrong).

In [1]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Button
import json
import os
from helpers import run_flatpak_command, cmd_output_to_dict, flatpak_date_to_datetime, get_additional_deps, GitNotFoundException, find_build_manifest
import subprocess
import shutil
from git.repo import Repo
import datetime as dt
from datetime import datetime, timezone
import yaml
from functools import reduce
import json5

### You can skip this and load the json directly if you want

In [2]:
# Compute sdk extension, and builder statistics
results = {}
with open('extension-base-builder', 'r') as f:
    for name in f.readlines():
        name = name.strip()
        cmd = ['flatpak', 'remote-info', 'flathub', name, '--log']
        output = run_flatpak_command(cmd, 'user', capture_output=True)
        commits = output.split("\n\n")[1:]
        results[name] = list()
        for commit in reversed(commits):
            if not commit.isspace() and commit:
                commit = cmd_output_to_dict(commit)
                commit_date = flatpak_date_to_datetime(commit["Date"])
                results[name].append(commit_date)

In [3]:
def datetime_json(o):
    if isinstance(o, datetime):
        return o.__str__()

In [4]:
json_results = json.dumps(results, default = datetime_json, indent=4)

In [5]:
with open("commit_dates", "w") as f:
    f.write(json_results)

### Skip until here to load the precomputed json

In [6]:
with open("commit_dates", "r") as f:
    deps_commits = json5.load(f)

In [7]:
def compute_error_prob_for_1_deps(git_commit, ostree_commit, deps_ref):    
    deps_commit = deps_commits[deps_ref]
    deps_commit = list(map(lambda c: datetime.strptime(c, '%Y-%m-%d %H:%M:%S%z'), deps_commit))

    # There are cases of really outdated programs which existed even before certain deps.
    if git_commit < deps_commit[0]:
        print("Too old to even be relevent")
        return 0.0
    
    before_git = git_commit
    before_ostree = ostree_commit
    for commit in deps_commit:
        date = commit
        if git_commit >= date:
            before_git = date
        if ostree_commit >= date:
            before_ostree = date

    if before_git == before_ostree:
        return 0.0
    else:
        commits_in_between = [commit for commit in deps_commit if commit >= git_commit and commit <= ostree_commit]
        # Since we took the ostree_commit to rebuild, the last possible one is the relevant one
        last_commit = commits_in_between[-1]
        # We assume uniform distribution
        error = 1. - (ostree_commit - last_commit).total_seconds()/(ostree_commit - git_commit).total_seconds()
        return error      

In [8]:
# Sanity check
builder_ref = 'org.flatpak.Builder/x86_64/stable'
builder_last_commit = datetime.strptime(deps_commits[builder_ref][-1], '%Y-%m-%d %H:%M:%S%z')
before = builder_last_commit - dt.timedelta(seconds= 9)
after = builder_last_commit + dt.timedelta(seconds= 1)
error = compute_error_prob_for_1_deps(before, after, builder_ref)
print(error)

0.9


In [9]:
def find_extension_branch(ref):
    possible_versions = ['1.6', '18.08', '19.08', '20.08', '21.08']
    cmd = ['flatpak', 'remote-info', 'flathub', ref, '-m', '--system']
    result = subprocess.run(cmd, capture_output=True)
    output = result.stdout.decode('UTF-8')
    for ver in possible_versions:
        if ver in output:
            return ver
    return None

In [10]:
# Sanity check
gnome_platform_ref = 'org.gnome.Sdk/x86_64/3.36'
version = find_extension_branch(gnome_platform_ref)
print(version)

19.08


In [11]:
cmd = "flatpak remote-ls flathub --system --app --columns=ref | cut -d '/' -f2,3,4"
result = subprocess.run(cmd, capture_output=True, shell=True)
output = result.stdout.decode('UTF-8')

In [12]:
non_zero_error = {}
failed = list()
badly_maintained = list()

### This step can also be long, some precomputed results are available

In [13]:
for ref in output.split('\n')[:-1]:
    dir = ref.replace('/', '_')
    
    os.mkdir(dir)
    path = f"{os.curdir}/{dir}"
    splitted_ref = ref.split('/')
    name = splitted_ref[0]
    branch = splitted_ref[2]
    try:
        git_url = get_additional_deps("flathub", name)

        repo = Repo.clone_from(git_url, path)
        remote_refs = repo.remote().refs
        possible_ones = [ref for ref in remote_refs if ref.name.endswith(branch)]
        if len(possible_ones) > 0:
            git_ref = possible_ones[0]
            git_ref.checkout()
        else:
            possible_ones = [ref for ref in remote_refs if ref.name.endswith("master")]
            if len(possible_ones) > 0:
                git_ref = possible_ones[0]
                git_ref.checkout()
         
        manifest = find_build_manifest(os.listdir(path), name)


        last_commit_date = repo.commit().committed_datetime
        cmd = ['flatpak', 'remote-info', 'flathub', ref, '--system']
        last_ostree_commit_date = cmd_output_to_dict(subprocess.run(cmd,capture_output=True).stdout.decode('UTF-8'))
        last_ostree_commit_date = flatpak_date_to_datetime(last_ostree_commit_date['Date'])
        estimate_interval = last_ostree_commit_date - last_commit_date
        estimate_interval = estimate_interval.total_seconds()
        
        # During our first rebuild we assumed that the last git commit would be the one corresponding to what's
        # on flathub, but since people push code that does not build on master this is wrong so here we
        # keep track of all this badly maintained programs.
        # Also we make sure the commit is not too recent, otherwise the build might not have been already available on flathub
        too_recent = (datetime.now(timezone.utc) - last_commit_date).total_seconds() <= 86400
        if last_ostree_commit_date < last_commit_date and not too_recent:
            badly_maintained.append(ref)
            print(f"shame on {ref} it does not even build")
        elif not too_recent:
            with open(f"{path}/{manifest}", "r") as f:
                try:
                    if manifest.endswith(".json"):
                        manifest = json5.load(f)
                    else:
                        manifest = yaml.safe_load(f)
                        
                    version = find_extension_branch(f"{manifest['sdk']}/x86_64/{manifest['runtime-version']}")
                    guessed_deps = [f"{ext}/x86_64/{version}" for ext in manifest.get('sdk-extensions', [])] + ['org.flatpak.Builder/x86_64/stable']
                    base_app = manifest.get('base')
                    if base_app:
                        guessed_deps.append(f"{base_app}/x86_64/{manifest['base-version']}")
                except:
                    # Sometimes the manifest parsing fails (because people don't care about specifications)
                    guessed_deps = ['org.flatpak.Builder/x86_64/stable']
        

            errors = [compute_error_prob_for_1_deps(last_commit_date, last_ostree_commit_date, ref) for ref in guessed_deps]
            error = max(errors)

            if error != 0.0:
                print(errors)
                print(error)
                print(guessed_deps)
                non_zero_error[ref] = error
                print(f"Ref {ref} used wrong dependency with probability {error}")
    except Exception as e:
        print(e)
        failed.append(ref)
        print(f"Ref {ref} failed")
    finally:
        shutil.rmtree(path)

[0.49150141643059486]
0.49150141643059486
['org.flatpak.Builder/x86_64/stable']
Ref ca.littlesvr.asunder/x86_64/stable used wrong dependency with probability 0.49150141643059486
[0.14316835755267798, 0.5957999704996646, 0.7280773210250182]
0.7280773210250182
['org.freedesktop.Sdk.Extension.node14/x86_64/21.08', 'org.flatpak.Builder/x86_64/stable', 'org.electronjs.Electron2.BaseApp/x86_64/21.08']
Ref ch.threema.threema-web-desktop/x86_64/stable used wrong dependency with probability 0.7280773210250182
Too old to even be relevent
shame on com.dangeredwolf.ModernDeck/x86_64/stable it does not even build
Too old to even be relevent
shame on com.github.Anuken.Mindustry/x86_64/stable it does not even build
shame on com.github.alainm23.planner/x86_64/stable it does not even build
[0.7677506607201849]
0.7677506607201849
['org.flatpak.Builder/x86_64/stable']
Ref com.github.arminstraub.krop/x86_64/stable used wrong dependency with probability 0.7677506607201849
[0.5198434125269978]
0.51984341252

No git repository found for package: org.freedesktop.Platform.VulkanInfo
Ref org.freedesktop.Platform.VulkanInfo/x86_64/20.08 failed
No git repository found for package: org.freedesktop.Platform.VulkanInfo
Ref org.freedesktop.Platform.VulkanInfo/x86_64/21.08 failed
[0.0, 0.560336212481672, 0.0]
0.560336212481672
['org.freedesktop.Sdk.Extension.llvm12/x86_64/21.08', 'org.freedesktop.Sdk.Extension.node14/x86_64/21.08', 'org.flatpak.Builder/x86_64/stable']
Ref org.gnome.Builder/x86_64/stable used wrong dependency with probability 0.560336212481672
shame on org.gnome.Connections/x86_64/stable it does not even build
[0.8826832690858767]
0.8826832690858767
['org.flatpak.Builder/x86_64/stable']
Ref org.gnome.Taquin/x86_64/stable used wrong dependency with probability 0.8826832690858767
[0.2463452506285544]
0.2463452506285544
['org.flatpak.Builder/x86_64/stable']
Ref org.gnome.atomix/x86_64/stable used wrong dependency with probability 0.2463452506285544
[0.8688659881149288, 0.5890067952387499

In [14]:
resulting_json = {
    'non_zero_error': non_zero_error,
    'failed': failed,
    'badly_maintained': badly_maintained
}
resulting_json = json5.dumps(resulting_json)
with open("commit_error_result", "w") as f:
    f.write(resulting_json)

### You can skip here

In [15]:
with open("commit_error_result", "r") as f:
    guessing_error = json5.load(f)

In [18]:
# We susbtract by 1 since flatpak-builder will always report as wrong (since it is also part of the guessed deps)
print(np.sum(list(guessing_error['non_zero_error'].values())) - 1)

19.517012774848347


We therfore expect 19.5 programs over the 1600 that would build using at least one dependency downgraded at the wrong version, which is reasonable.