In [1]:
import hashlib
import os

import numpy as np
import pandas as pd

In [2]:
def make_random_file(name=None, size=1024):
    data = os.urandom(size)
    if isinstance(name, str):
        with open(name, "wb") as fout:
            fout.write(data)
    else:
        return data


def encrypt_file(name, key):
    with open(name, "rb") as fin:
        data = fin.read()
    data = bytearray(data)
    for i in range(len(data)):
        data[i] ^= key
    with open(name, "wb") as fout:
        fout.write(data)


def get_hash(data, istart=0, ilen=32):
    """Compute hash of data in slice [istart:istart+len]"""
    if ilen is None:
        ilen = len(data) - istart

    return hashlib.md5(data[int(istart) : int(istart + ilen)]).hexdigest()

In [3]:
class Miner:
    def __init__(self, uid, files=None):
        self.uid = uid
        self.files = {}

        if isinstance(files, dict):
            for file_id, file in files.items():
                self.add_file(file, file_id)

    # def __repr__(self):
    #     return f'Miner {self.uid} with {self.n_files} files: {list(self.files.keys())}'

    # def __str__(self):
    #     return f'Miner {self.uid} with {self.n_files} files: {list(self.files.keys())}'

    @property
    def n_files(self):
        return len(self.files)

    def add_file(self, file, file_id):
        self.files[file_id] = file

    def forward(self, file_id, istart, ilen):
        stored_file = self.files.get(file_id)
        if stored_file is None:
            raise ValueError(f"File {file_id} not found")

        return get_hash(stored_file, istart, ilen)

In [None]:
# One validator with many miners

miners = [Miner(uid=i) for i in range(10)]

n_files = 100
n_hashes = 100
ilen = 32
redundancy = 3
hashes = []
files = {}
file_sizes = {}
for i in range(n_files):
    # make a random file id (could be a hash of the file)
    file_id = os.urandom(8).hex()

    file_size_bytes = np.random.randint(1024, 1024**2)
    data = make_random_file(size=file_size_bytes)

    # select k miner to store file
    selected_uids = np.random.choice(range(len(miners)), size=redundancy, replace=False)

    for uid in selected_uids:
        miners[uid].add_file(data, file_id)

    files[file_id] = data
    ibeg = np.random.randint(0, len(data) - ilen, size=n_hashes)

    file_sizes[file_id] = file_size_bytes
    # Full file hash
    hashes += [
        {
            "file_id": file_id,
            "file_size": file_size_bytes,
            "istart": 0,
            "ilen": len(data),
            "hash": get_hash(data, istart=0, ilen=len(data)),
            "uids": selected_uids,
        }
    ]
    # offset hashes
    hashes += [
        {
            "file_id": file_id,
            "file_size": file_size_bytes,
            "istart": ii,
            "ilen": ilen,
            "hash": get_hash(data, istart=ii, ilen=ilen),
            "uids": selected_uids,
        }
        for ii in ibeg
    ]

# score for a given miner is the sum of bytes that it can prove it is storing
miner_scores = {
    miner.uid: sum([file_sizes[file_id] for file_id in list(miner.files.keys())])
    for miner in miners
}

for i, miner in enumerate(miners):
    print(f"Miner with uid {i} has {miner.n_files} files and score {miner_scores[i]:,}")

df_hashes = pd.DataFrame(hashes)
df_hashes

In [None]:
miners.pop(2)

In [None]:
miners

In [None]:
# Challenge miner to reproduce hashes

def challenge(file_id):
    entries = list(filter(lambda x: x["file_id"] == file_id, hashes))
    # grab one entry
    entry = np.random.choice(entries)
    print(entry)

    # select a miner that has the file
    uid = np.random.choice(entry["uids"])
    miner = miners[uid]

    miner_hash = miner.forward(file_id, entry["istart"], entry["ilen"])

    return miner_hash == entry["hash"]


challenge("file-8")

In [None]:
"""
Questions:
"""