In [None]:
import itertools
from functools import partial
from typing import List, Iterable, Tuple

import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

from generator.sequence import generate_random_sequence
from simulator.loss import ge_loss, ar1_loss
from simulator.permutation import generate_permutation, apply_permutation
from simulator.simulator import Policy
from synchronizer.greedy import find_losses
from utils.consume import consume, consume_all

`find_losses` tries to recover the loss indices, where an element in the original array was lost using longest subsequence matching.

In [None]:
original = [1, 2, 3, 2, 2, 1, 2]
received = [1, 2, 3, 2, 1]
find_losses(original, received)

In [None]:
def run_loss_finder(sequence: Iterable[int],
                    length: int,
                    policies: List[Policy]):
    original = consume(sequence, length=length)
    permutation = consume_all(generate_permutation(policies, length))
    received = apply_permutation(original, permutation)

    expected = find_losses(consume_all(range(length)), permutation)
    actual = find_losses(original, received)
    return expected, actual

In [None]:
def test_loss_finder(sequence_lengths: List[int] = None, 
                    loss_probabilities: List[float] = None,
                    chunk_sizes: List[int] = None,
                    runs: int = 1) -> pd.DataFrame:

    results = []
    for _ in range(runs):
        for length, chunk_size, prob in itertools.product(sequence_lengths, chunk_sizes, loss_probabilities):
            loss_policy = ge_loss_with_prob_goodbad(prob)
            sequence = generate_random_sequence(chunk_size)
            expected, actual = run_loss_finder(sequence, length, [loss_policy])
            posterior_loss_rate = len(expected) / length
            distance_error = sum(abs(e - a) for e, a in zip(expected, actual))
            results.append((length, chunk_size, posterior_loss_rate, distance_error))
    return pd.DataFrame(results, columns=['sequence_length', 'chunk_size', 'loss_rate', 'distance_error'])

def ge_loss_with_prob_goodbad(prob: float):
    return partial(ge_loss, prob_goodbad=prob, prob_badgood=0.8, prob_loss_good=0.2, prob_transmit_bad=0.25)

Given different bits per packet and posterior loss rates, what is the percentage of index sets are predicted correctly by the loss finder?

In [None]:
results_df = test_loss_finder(sequence_lengths=[10,20,30,40,50],
                              loss_probabilities=[0.0025, 0.005, 0.001, 0.02],
                              chunk_sizes=[1,2,3,4],
                              runs=250)

loss_bins = [0,0.02,0.05,0.1,0.15,0.2]
bin_labels = [f'{100*a}-{100*b}%' for a, b in zip(loss_bins[:-1], loss_bins[1:])]
results_df['loss_bin'] = pd.cut(results_df['loss_rate'], loss_bins, labels=bin_labels)
results_df['correct'] = results_df['distance_error'] == 0

pd.options.display.float_format = '{:.1f}%'.format
results_df.groupby(['chunk_size', 'loss_bin']).agg({
    'correct': ['mean']
}).apply(lambda x: 100 * x).unstack(1)

Unnamed: 0_level_0,correct,correct,correct,correct
Unnamed: 0_level_1,mean,mean,mean,mean
loss_bin,2.0-5.0%,5.0-10.0%,10.0-15.0%,15.0-20.0%
chunk_size,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
1,45.5%,32.7%,5.7%,6.3%
2,68.4%,60.4%,30.2%,23.8%
3,89.7%,77.5%,53.2%,47.0%
4,93.3%,89.5%,74.1%,68.8%


What is the mean cumulative distance error for the indices?

For example, if `[10,23,35]` are the actual loss indices and `[11,20,35]` was predicted, the cumulative distance error is 3. 

In [None]:
pd.options.display.float_format = '{:.2f}'.format
results_df.groupby(['chunk_size', 'loss_bin']).agg({
    'distance_error': ['mean']
}).unstack(1)

Unnamed: 0_level_0,distance_error,distance_error,distance_error,distance_error
Unnamed: 0_level_1,mean,mean,mean,mean
loss_bin,2.0-5.0%,5.0-10.0%,10.0-15.0%,15.0-20.0%
chunk_size,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
1,1.38,2.9,10.18,16.44
2,0.45,0.75,2.11,3.94
3,0.13,0.3,0.79,1.54
4,0.08,0.13,0.36,0.71


## TODO

- Refine approach
    - Split synchronizer and estimator
    - Incorporate reordering
    - Estimate error resp. detect if not synchronized
- Define envelope where it would be nice to work
- Run tests for that envelope
- Plot Loss vs. accuracy