In [None]:
# imports/config
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

plt.rcParams['figure.figsize'] = [20, 12]

In [None]:
# plot as histogram and boxplot
def plot(data_set: pd.DataFrame, title: str ,out: str):
    isolated = data_set['browser_isolated'].to_numpy()
    unisolated = data_set['browser_unisolated'].to_numpy()
    
    assert len(isolated) == len(unisolated)
    data_len = len(isolated)

    # throw away outlier (the lower and upper 5%)
    ten_perc = int(data_len * 0.05)

    isolated.sort()
    isolated = isolated[ten_perc:-ten_perc]
    len_isolated = len(isolated)
    isolated_min = isolated.min()
    isolated_max = isolated.max()

    unisolated.sort()
    unisolated = unisolated[ten_perc:-ten_perc]
    len_unisolated = len(unisolated)
    unisolated_min = unisolated.min()
    unisolated_max = unisolated.max()

    min = np.array([isolated_min, unisolated_min]).min()
    max = np.array([isolated_max, unisolated_max]).max()

    assert len_isolated == len_unisolated

    bins = np.arange(round(min, 2) - 0.1, round(max, 2) + 0.2, step=0.005)

    # check for mircoseconds (step 0.1)
    ticks = np.arange(round(min, 0) - 0.1, round(max, 0) + 0.2, step=0.1)

    # plot as histogram
    plt.hist(x=isolated, color='blue' , alpha=1, rwidth=0.6 , bins=bins )
    plt.hist(x=unisolated, color='red', alpha=1, rwidth=0.6 , bins=bins )

    handles = [Rectangle((0,0),1,1, color=c,ec="k") for c in ["blue", "red"]]
    labels= ["Isolated","Unisolated"]
    plt.legend(handles, labels)

    plt.grid(axis='y', alpha=0.5)
    plt.title(f'Timestamp samples (target: 100ms) - {title}')
    plt.ylabel('Number of Occurrences')
    plt.xlabel('Distance to last Timestamp (ms)')
    plt.xticks(ticks, fontsize=14)
    plt.tight_layout()
    plt.savefig(out, bbox_inches="tight", pad_inches=0.3)
    plt.show()

# plot boxplot
def boxplot(data_set: pd.DataFrame,out:str):
    
    colums = data_set.columns.to_list()
    ax = data_set[colums[0:8]].plot(
        kind='box', title='boxplot', showmeans=True, showfliers=False)

    ax.tick_params(axis='x', rotation=30 ,labelsize=14)
    plt.setp(ax.get_xticklabels(), horizontalalignment='right')
    plt.tight_layout()
    plt.title(f"Browser timestamp precision (target: 100ms)", fontsize=18)
    plt.ylabel("Difference in sec", fontsize=18)
    plt.xlabel("Datapoints", fontsize=18)
    plt.tight_layout()
    plt.savefig(out ,bbox_inches="tight", pad_inches=0.3,)
    plt.show()

In [None]:
# plot tor data as histogram
def plot_tor(data_set, title: str, out):
    unisolated = data_set['browser_unisolated'].to_numpy()[10:]

    unisolated.sort()
    unisolated = unisolated[10:-25]
    unisolated_min = unisolated.min()
    unisolated_max = unisolated.max()

    min = np.array([ unisolated_min]).min()
    max = np.array([ unisolated_max]).max()

    bins = np.arange(round(min, 2) - 5 , round(max, 2) + 5, step=0.1)
    
    ticks = np.arange(round(min, 2) - 5, round(max, 2) + 5, step=2)

    # plot as histogram
    plt.hist(x=unisolated, color='red', alpha=1, rwidth=0.6 , bins=bins )

    plt.grid(axis='y', alpha=0.5)
    plt.title(f'Timestamp samples (target: 100ms) - ${title}')
    plt.ylabel('Number of Occurrences')
    plt.xlabel('Distance to last Timestamp (ms)')
    plt.xticks(ticks, fontsize=14)
    plt.tight_layout()
    plt.savefig(out, bbox_inches="tight", pad_inches=0.3,)
    plt.show()

def boxplot_tor(data_set: pd.DataFrame, out:str):
    colums = data_set.columns.to_list()
    # print(data_set[colums[4:8]])
    ax = data_set[colums[4:8]].plot(
        kind='box', title='boxplot', showmeans=True, showfliers=False)

    ax.tick_params(axis='x', labelsize=14)
    plt.setp(ax.get_xticklabels(), horizontalalignment='right')
    plt.tight_layout()
    plt.title(f"Browser timestamp precision (target: 100ms)", fontsize=18)
    plt.ylabel("Distance to last Timestamp (ms)", fontsize=18)
    plt.xlabel("Datapoints", fontsize=18)
    plt.tight_layout()
    plt.savefig(out, bbox_inches="tight", pad_inches=0.3,)
    plt.show()

In [None]:
# plot firefox configed as histogram and boxplot
def plot_firefox_configed(data_set: pd.DataFrame, data_set_configed: pd.DataFrame, out: str):

    isolated = data_set['browser_isolated'].to_numpy()
    unisolated = data_set['browser_unisolated'].to_numpy()

    isolated_resist = data_set_configed['isolated_resist'].to_numpy()
    unisolated_resist = data_set_configed['unisolated_resist'].to_numpy()
    
    assert len(isolated_resist) == len(unisolated_resist)
    data_len = len(isolated_resist)

    # throw away outlier (the lower and upper 5%)
    ten_perc = int(data_len * 0.05)
#
    isolated.sort()
    isolated = isolated[ten_perc:-ten_perc]
    len_isolated = len(isolated)
    isolated_min = isolated.min()
    isolated_max = isolated.max()
#
    unisolated.sort()
    unisolated = unisolated[ten_perc:-ten_perc]
    len_unisolated = len(unisolated)
    unisolated_min = unisolated.min()
    unisolated_max = unisolated.max()
#
    isolated_resist.sort()
    isolated_resist = isolated_resist[ten_perc:-ten_perc]
    len_isolated_resist = len(isolated_resist)
    isolated_resist_min = isolated_resist.min()
    isolated_resist_max = isolated_resist.max()
#
    unisolated_resist.sort()
    unisolated_resist = unisolated_resist[ten_perc:-ten_perc]
    len_unisolated_resist = len(unisolated_resist)
    unisolated_resist_min = unisolated_resist.min()
    unisolated_resist_max = unisolated_resist.max()

    

    min = np.array([isolated_min, unisolated_min, isolated_resist_min, unisolated_resist_min]).min()
    max = np.array([isolated_max, unisolated_max, isolated_resist_max, unisolated_resist_max]).max()

    assert len_isolated == len_unisolated == len_isolated_resist == len_unisolated_resist

    bins = np.arange(round(min, 0) - 1, round(max, 2) + 1, step=0.1)

    #
    ticks = np.arange(round(min, 0) - 1 , round(max, 0) + 1, step=1)

    # plot as histogram
    plt.hist(x=isolated, color='green' , alpha=0.6, rwidth=0.5 , bins=bins, label="Isolated")
    plt.hist(x=unisolated, color='yellow', alpha=0.6, rwidth=0.5 , bins=bins, label="Unisolated")
    plt.hist(x=isolated_resist, color='blue' , alpha=0.6, rwidth=3 , bins=bins, label="Isolated resistFingerprinting")
    plt.hist(x=unisolated_resist, color='red', alpha=0.6, rwidth=1 , bins=bins,label="Unisolated resistFingerprinting")

    plt.grid(axis='y', alpha=0.5)
    plt.title('Timestamp samples (target: 100ms)')
    plt.ylabel('Number of Occurrences')
    plt.xlabel('Distance to last Timestamp (ms)')
    #plt.xticks(rotation=60)
    plt.xticks(ticks, fontsize=14)
    plt.tight_layout()
    plt.legend()
    plt.savefig(out, bbox_inches="tight", pad_inches=0.3)
    plt.show()

# plot boxplot
def boxplot_firefox_configed(data_set: pd.DataFrame,out:str):
    
    colums = data_set.columns.to_list()
    ax = data_set[colums[0:8]].plot(
        kind='box', title='boxplot', showmeans=True, showfliers=True)

    ax.tick_params(axis='x', rotation=30 ,labelsize=14)
    plt.setp(ax.get_xticklabels(), horizontalalignment='right')
    plt.tight_layout()
    plt.title(f"Browser timestamp precision (target: 100ms)", fontsize=18)
    plt.ylabel("Difference in sec", fontsize=18)
    plt.xlabel("Datapoints", fontsize=18)
    plt.tight_layout()
    plt.savefig(out ,bbox_inches="tight", pad_inches=0.3,)
    plt.show()

In [None]:
def reject_outliers(data, lower=.01, upper=.025):
    #return data
    data.sort()
    l = len(data)
    upper = int(l * upper)
    lower = int(l * lower)
    return data[lower:-upper]

In [None]:
# calc
def calc(data: pd.DataFrame, has_isolated = True, isolated_col = 'browser_isolated', unisolated_col = 'browser_unisolated', 
         lower=.01, upper=.025, filter: bool = True):

    if has_isolated:
        isolated = (np.array(data[isolated_col].to_list())).round(3)
        if filter:
            isolated = reject_outliers(isolated,lower=lower,upper=upper)

        min_isolated = round(isolated.min(), 1)
        max_isolated = round(isolated.max(), 1)
        mean_isolated = round(isolated.mean(), 1)
        std_isolated = round(isolated.std(), 1)
        uniques_isolated = np.unique(isolated)
        print(uniques_isolated)

        print("Isolated")
        print(f"Isolated min      : {min_isolated:.1f}")
        print(f"Isolated max      : {max_isolated:.1f}")
        print(f"Isolated mean     : {mean_isolated:.1f}")
        print(f"Isolated std      : {std_isolated:.1f}")
        print(f"Isolated uniques  : {len(uniques_isolated)}")
        print("")

    unisolated = (np.array(data[unisolated_col].to_list())).round(3)
    if filter:
        unisolated = reject_outliers(unisolated,lower=lower,upper=upper)
    
    min_unisolated = round(unisolated.min(), 1)
    max_unisolated = round(unisolated.max(), 1)
    mean_unisolated = round(unisolated.mean(), 1)
    std_unisolated = round(unisolated.std(), 1)
    uniques_unisolated = np.unique(unisolated)
    print(uniques_unisolated)

    print("Unisolated")
    print(f"Unisolated min    : {min_unisolated:.1f}")
    print(f"Unisolated max    : {max_unisolated:.1f}")
    print(f"Unisolated mean   : {mean_unisolated:.1f}")
    print(f"Unisolated std    : {std_unisolated:.1f}")
    print(f"Unisolated uniques: {len(uniques_unisolated)}")

In [None]:
# chromium
chromium = pd.read_csv("./chromium/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(chromium)
boxplot(chromium, './chromium/box.jpeg')

In [None]:
# chrome
chrome = pd.read_csv("./chrome/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(chrome)
boxplot(chrome, './chrome/box.jpeg')

In [None]:
# edge
edge = pd.read_csv("./edge/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(edge)
boxplot(edge, './edge/box.jpeg')

In [None]:
# firefox
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox)
boxplot(firefox, './firefox/box.jpeg')

In [None]:
# tor
tor = pd.read_csv("./tor/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(tor, False)

In [None]:
# safari
safari = pd.read_csv("./safari/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(safari, upper=0.01)

In [None]:
# safari2
safari2 = pd.read_csv("./safari2/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(safari2, upper=0.01)

In [None]:
# firefox mac
safari = pd.read_csv("./firefox_mac/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9])

In [None]:
# firefox NJ
firefox_NJ = pd.read_csv("./firefox_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_NJ, isolated_col='browser_isolated', unisolated_col='browser_unisolated', filter=False)

In [None]:
# firefox rf
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist', filter=False)

In [None]:
# firefox rf 20ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_20ms', filter=False)

In [None]:
# firefox rf 33ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_33ms', filter=False)

In [None]:
# firefox rf 40ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_40ms', filter=False)

In [None]:
# firefox rf 60ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_60ms', filter=False)

In [None]:
# firefox rf 80ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, isolated_col='isolated_resist_80ms', unisolated_col='unisolated_resist_80ms', filter=False)

In [None]:
# firefox rf 100ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_100ms', filter=False)

In [None]:
# firefox rf 120ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_120ms', filter=False)

In [None]:
# firefox rf 140ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_140ms', filter=False)

In [None]:
# firefox rf 160ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_160ms', filter=False)

In [None]:
# firefox rf 180ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_180ms', filter=False)

In [None]:
# firefox rf 200ms
firefox_RF = pd.read_csv("./firefox_rf/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF, False, unisolated_col='unisolated_resist_200ms', filter=False)

In [None]:
# firefox rf NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_NJ, False, unisolated_col='unisolated_resist_NJ', filter=False)

In [None]:
# firefox rf 20ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_20_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_20_NJ, False, unisolated_col='unisolated_resist_20ms_NJ', filter=False)

In [None]:
# firefox rf 33ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_33_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_33_NJ, False, unisolated_col='unisolated_resist_33ms_NJ', filter=False)

In [None]:
# firefox rf 40ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_40_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_40_NJ, False, unisolated_col='unisolated_resist_40ms_NJ', filter=False)

In [None]:
# firefox rf 60ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_60_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_60_NJ, False, unisolated_col='unisolated_resist_60ms_NJ', filter=False)

In [None]:
# firefox rf 80ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_80_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_80_NJ, False, unisolated_col='unisolated_resist_80ms_NJ', filter=False)

In [None]:
# firefox rf 100ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_100_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_100_NJ, False, unisolated_col='unisolated_resist_100ms_NJ', filter=False)

In [None]:
# firefox rf 120ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_120_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_120_NJ, False, unisolated_col='unisolated_resist_120ms_NJ', filter=False)

In [None]:
# firefox rf 140ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_140_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_140_NJ, False, unisolated_col='unisolated_resist_140ms_NJ', filter=False)

In [None]:
# firefox rf 160ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_160_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_160_NJ, False, unisolated_col='unisolated_resist_160ms_NJ', filter=False)

In [None]:
# firefox rf 180ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_180_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_180_NJ, False, unisolated_col='unisolated_resist_180ms_NJ', filter=False)

In [None]:
# firefox rf 200ms NJ
firefox = pd.read_csv("./firefox/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
firefox_RF_200_NJ = pd.read_csv("./firefox_rf_NJ/complete.csv", skiprows=[1,2,3,4,5,6,7,8,9,10])
calc(firefox_RF_200_NJ, False, unisolated_col='unisolated_resist_200ms_NJ', filter=False)