In [1]:
import numpy as np 
import pandas as pd
import glob
import os

from matplotlib import pyplot as plt, pylab as pl
%matplotlib inline
plt.style.use("bmh")
plt.rcParams["figure.figsize"] = (12,4)
import seaborn as sns
import plotly.express as px

#loading:
from base64 import b64decode, b64encode
from gzip import decompress, compress
import json

import pyarrow.parquet as pq
import pyarrow as pa

from datetime import datetime

from scipy.stats import skew, kurtosis
from sklearn.linear_model import LinearRegression

In [2]:
# Vectors and Metrics
PT_LARGE_SYN = 'PT_LARGE_SYN'
PT_SYN = 'PT_SYN'
PT_TCP = 'PT_TCP'
PT_DNS = 'PT_DNS'
PT_DNS_RESPONSE = 'PT_DNS_RESPONSE'
PT_NTP = 'PT_NTP'
PT_SSDP = 'PT_SSDP'
PT_ICMP = 'PT_ICMP'
PT_GENERAL = 'PT_GENERAL'
PT_TOTAL = 'PT_TOTAL'
PT_UDP = 'PT_UDP'
PT_FRAG = 'PT_FRAG'
PT_NETFLOW = 'PT_NETFLOW'

PPS = 'PPS'
BW = 'BW'

vectors = [PT_LARGE_SYN, PT_SYN,PT_TCP, PT_DNS, PT_DNS_RESPONSE, PT_NTP,PT_SSDP,PT_ICMP,PT_GENERAL,PT_TOTAL,PT_UDP,PT_FRAG, PT_NETFLOW]
metrics = [PPS,BW]

In [3]:
VECTOR = 'PT_TCP'
METRIC = 'PPS'

In [4]:
def calc_features_from_ts(ts):
        '''Calculating Features from time series of IP'''
        new_row = []
        
        df_daily_max = ts.resample('D').max()
        df_daily_mean = ts.resample('D').mean()
        df_daily_median = ts.resample('D').median()
        df_daily_q90 = ts.resample('D').quantile(0.9)


        new_row.append(df_daily_max['passed_val'].quantile(0.90))

        new_row.append(df_daily_mean['passed_val'].quantile(0.90))

        new_row.append(df_daily_median['passed_val'].quantile(0.90))

        new_row.append(df_daily_q90['passed_val'].quantile(0.90))
        

        # quantile 70
        new_row.append(df_daily_max['passed_val'].quantile(0.70))

        new_row.append(df_daily_mean['passed_val'].quantile(0.70))

        new_row.append(df_daily_median['passed_val'].quantile(0.70))

        new_row.append(df_daily_q90['passed_val'].quantile(0.70))

        # top 4 maximums
        df_daily_max_sorted = df_daily_max.sort_values(by=['passed_val'], ascending=False)

        if df_daily_max_sorted['passed_val'].shape[0] > 0:
                new_row.append(df_daily_max_sorted['passed_val'][0])
        else:
                new_row.append(np.nan)
        

        if df_daily_max_sorted['passed_val'].shape[0] > 1:
                new_row.append(df_daily_max_sorted['passed_val'][1])
        else:
                new_row.append(np.nan)
        

        if df_daily_max_sorted['passed_val'].shape[0] > 2:
                new_row.append(df_daily_max_sorted['passed_val'][2])    
        else:
                new_row.append(np.nan)
        

        if df_daily_max_sorted['passed_val'].shape[0] > 3:
                new_row.append(df_daily_max_sorted['passed_val'][3])
        else:
                new_row.append(np.nan)


        return new_row

In [5]:
def extract_ts_from_dict(ts_dict):
    '''Creating the time series data to be later added to a file'''
    passed_values = []
    blocked_values = []

    for day, values in ts_dict['passedDays'].items():
        for value in values['values']:
            passed_values.append(value)

    for day, values in ts_dict['blockedDays'].items():
        for value in values['values']:
            blocked_values.append(value)

    # Some times passed_Values or blocked values are empty
    if len(passed_values) == 0:
        passed = pd.DataFrame({'passed_val':[],'passed_tmstmp':[]})
    else:    
        passed = pd.DataFrame(passed_values).rename(columns={'value': 'passed_val', 'timeStamp': 'passed_tmstmp'})
    
    if len(blocked_values) == 0:
        blocked = pd.DataFrame({'blocked_val':[],'blocked_tmstmp':[]})
    else:    
        blocked = pd.DataFrame(blocked_values).rename(columns={'value': 'blocked_val', 'timeStamp': 'blocked_tmstmp'})

    #display(blocked.blocked_val.sum())
    ts = pd.concat([passed, blocked], axis=1).drop(['blocked_tmstmp'], axis = 1)  
    ts.set_index(['passed_tmstmp'], inplace=True, drop=True, append=False, verify_integrity=False   ) 
    ts.index = pd.to_datetime(ts.index, unit = 's')
    
    ts = ts.sort_index()
    ts.fillna(0, inplace=True)
    ts['total_val'] = ts['passed_val'] + ts['blocked_val']

    return ts

In [6]:
# 

def calc_features(df, plot = False):
        
        daily_max_q90 = []
        daily_mean_q90 = []
        daily_median_q90 = []
        daily_q90_q90 = []

        daily_max_q70 = []
        daily_mean_q70 = []
        daily_median_q70 = []
        daily_q90_q70 = []

        daily_max_1 = []
        daily_max_2 = []
        daily_max_3 = []
        daily_max_4 = []



        for _, row in df.iterrows():

                pred_id = row['prediction_id']
                parquet_file = row['ts_name']

                ts_data_raw = pd.read_parquet(directory.joinpath(parquet_file + '.parquet'), engine='pyarrow').sort_index()

                        # Filter out blocked traffic:
                ts_data = ts_data_raw[~(ts_data_raw['blocked_val'] > 0)]
        
                        # A variety of resamples: 
                df_daily_max = ts_data.resample('D').max()
                df_daily_mean = ts_data.resample('D').mean()
                df_daily_median = ts_data.resample('D').median()
                df_daily_q90 = ts_data.resample('D').quantile(0.9)

                        # Calculate features:
                # quantile 90                
                daily_max_q90.append(df_daily_max['passed_val'].quantile(0.90))
                daily_mean_q90.append(df_daily_mean['passed_val'].quantile(0.90))
                daily_median_q90.append(df_daily_median['passed_val'].quantile(0.90))
                daily_q90_q90.append(df_daily_q90['passed_val'].quantile(0.90))

                # quantile 70
                daily_max_q70.append(df_daily_max['passed_val'].quantile(0.70))
                daily_mean_q70.append(df_daily_mean['passed_val'].quantile(0.70))
                daily_median_q70.append(df_daily_median['passed_val'].quantile(0.70))
                daily_q90_q70.append(df_daily_q90['passed_val'].quantile(0.70))

                # top 4 maximums
                df_daily_max_sorted = df_daily_max.sort_values(by=['passed_val'], ascending=False)
                daily_max_1.append(df_daily_max_sorted['passed_val'][0])
                daily_max_2.append(df_daily_max_sorted['passed_val'][1])
                daily_max_3.append(df_daily_max_sorted['passed_val'][2])
                daily_max_4.append(df_daily_max_sorted['passed_val'][3])

                # Plot
                if plot == True:
                        fig = go.Figure()
                        # Add raw data plot
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=ts_data_raw['passed_val'],  mode='lines', name='ts_data_raw'))
                        # Add filtered from blocked values plot
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=ts_data['passed_val'],  mode='markers', name='ts_data'))

                        # Add resampled data plot
                        fig.add_trace(go.Scatter(x=df_daily_max.index, y=df_daily_max['passed_val'],  mode='markers', name='daily_max'))
                        fig.add_trace(go.Scatter(x=df_daily_q90.index, y=df_daily_q90['passed_val'],  mode='markers', name='daily_q90'))

                        # plot features:
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=[daily_max_q90]*len(ts_data_raw), mode='lines', name='daily_max_q90'))
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=[daily_max_q70]*len(ts_data_raw), mode='lines', name='daily_max_q70'))
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=[3*daily_max_q90]*len(ts_data_raw), mode='lines', name='3*daily_max_q90'))

                        # plot thresholds:
                        fig.add_trace(go.Scatter(x=ts_data_raw.index, y=[row['ip_ss']]*len(ts_data_raw), mode='lines', line=dict(dash='dash'), name='ip_ss'))
                        fig.show()

        df['daily_max_q90'] = daily_max_q90
        df['daily_mean_q90'] = daily_mean_q90
        df['daily_median_q90'] = daily_median_q90
        df['daily_q90_q90'] = daily_q90_q90

        df['daily_max_q70'] = daily_max_q70
        df['daily_mean_q70'] = daily_mean_q70
        df['daily_median_q70'] = daily_median_q70
        df['daily_q90_q70'] = daily_q90_q70

        df['daily_max_1'] = daily_max_1
        df['daily_max_2'] = daily_max_2
        df['daily_max_3'] = daily_max_3
        df['daily_max_4'] = daily_max_4

        return df

In [7]:
tcp_pps = pd.read_csv('./tcp_pps_v7.csv').drop(columns=['Unnamed: 0']) # 1787 rows
tcp_pps

Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4
0,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,12.0,0.32800,0.013707,0.012000,0.026000,0.22200,0.013174,0.011000,0.02400,0.36100,0.34400,0.33900,0.32800
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.46400,1.438101,0.968000,2.950200,14.94100,1.233787,0.740000,2.75980,17.05900,16.32800,15.79300,15.46400
2,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,65.0,36.82500,10.827847,10.168050,17.704560,24.30600,9.568966,8.899950,15.38814,105.93900,70.05795,61.31595,36.82500
3,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,68.0,64.64205,26.075037,28.985475,44.062260,59.09805,23.979144,25.009050,41.58135,75.12705,64.87200,64.67205,64.64205
4,4446402910b942444736abdc234a69c1be8cab947550b2...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,41.0,24.86205,5.571671,5.067000,11.071395,19.12395,5.403210,4.541475,10.58505,36.03105,29.05905,25.01400,24.86205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1782,79296fdcd9d94f6a606db159963999bfc33cb8698ba71f...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,45.0,38.55000,11.764190,6.783975,26.809650,30.18405,9.537494,5.131500,23.04000,98.92905,81.83805,46.54095,38.55000
1783,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,32.0,19.74795,1.831723,1.050975,4.537740,6.05805,0.227902,0.031950,0.67560,32.27895,27.07005,24.79005,19.74795
1784,99b741be8b52a7ff1e704abf80d85c000b0d9d33feedc3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,87.0,66.82305,2.900799,0.822000,8.486760,51.13200,2.485531,0.711975,7.73400,79.20600,75.39000,67.31895,66.82305
1785,84bee7e6f334fb45dfa5b2089e06cece732e898ad7e1a2...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,16.7,9.73395,2.926451,2.955975,4.491300,9.43005,2.748698,2.685450,4.35216,10.27800,10.22805,9.76500,9.73395


### Testing different blocked val threshold:

In [8]:

# data = pd.read_csv('./../../Itay_&_Mila_data/all_vector_metric_prediction_id-combinations/FileCatalog.csv')
# data

In [9]:
data_v2_p1 = pd.read_csv('./FileCatalog_v2_p1.csv')
data_v2_p2 = pd.read_csv('./FileCatalog_v2_p2.csv')
data = pd.concat([data_v2_p1, data_v2_p2])
data


  data_v2_p1 = pd.read_csv('./FileCatalog_v2_p1.csv')
  data_v2_p2 = pd.read_csv('./FileCatalog_v2_p2.csv')


Unnamed: 0,id,prediction_id,original_file,vector,metric,request_id,ip_ss,ip_sc,ip_nw,ip_rl,...,total_val_slope,total_val_crest_factor,total_val_shape_factor,total_val_avg_first_order_diff,missing_tmstmp_percentage,percent_of_largest_dead,percent_of_zeros,total_time,total_val_median_share,super_peaks_file
0,4722,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_DNS_RESPONSE,BW,395,2.0,4.0,6.0,9.0,...,2.181106e-10,40.211269,8.916736,2.668500e-24,0.069444,0.338,0.976257,30.0,0.000000,
1,4716,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_LARGE_SYN,BW,395,0.1,0.2,0.2,0.2,...,-2.288744e-11,123.812391,46.670364,5.023059e-24,0.069444,0.240,0.999166,30.0,0.000000,
2,4730,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_UDP,BW,395,7.0,12.0,16.0,30.0,...,4.463537e-08,162.524787,10.914687,-5.258403e-09,0.069444,0.000,0.001181,30.0,0.008846,
3,4720,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_DNS,BW,395,1.0,2.0,3.0,4.0,...,5.696970e-09,24.139679,13.466244,-4.077000e-09,0.069444,0.001,0.497081,30.0,0.000821,
4,4732,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_ICMP,BW,395,5.0,7.5,10.0,20.0,...,3.013283e-10,31.148846,2.104985,-1.482545e-09,0.069444,0.000,0.000023,30.0,0.004885,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40977,99635,a081170b6a3b5a899dc186b563488c5edd626214a9f310...,TimeSeriesDataB64_4_900_1000.parquet,PT_LARGE_SYN,PPS,4941,0.4,0.6,0.6,0.6,...,7.913066e-11,66.776758,29.192142,2.208907e-24,27.299872,0.080,0.996708,30.0,0.000000,
40978,99629,a081170b6a3b5a899dc186b563488c5edd626214a9f310...,TimeSeriesDataB64_4_900_1000.parquet,PT_ICMP,PPS,4941,5.0,7.0,9.0,15.0,...,-2.653187e-10,23.467987,5.881327,7.639066e-09,0.000000,0.000,0.240486,30.0,0.000164,
40979,99631,a081170b6a3b5a899dc186b563488c5edd626214a9f310...,TimeSeriesDataB64_4_900_1000.parquet,PT_SYN,PPS,4941,10.0,12.0,18.0,20.0,...,-9.371713e-07,71.943775,1.532923,-3.078775e-08,0.000000,,0.000000,30.0,0.054190,
40980,99641,a081170b6a3b5a899dc186b563488c5edd626214a9f310...,TimeSeriesDataB64_4_900_1000.parquet,PT_SSDP,PPS,4941,0.4,0.5,0.7,0.7,...,-9.576488e-10,37.016018,3.255654,4.185067e-22,0.000000,0.000,0.059306,30.0,0.000304,


In [10]:
# def filter_data_frame(df, **kwargs,):
#     filtered_df = df.copy()
#     for col, val in kwargs.items():
#         if isinstance(val, list):
#             filtered_df = filtered_df[filtered_df[col].isin(val)]
#         else:
#             filtered_df = filtered_df[filtered_df[col] == val]
#     return filtered_df

In [11]:
# filtered_df = filter_data_frame(data,vector=VECTOR, metric=METRIC,)
# filtered_df

In [12]:
# def get_ts_by_vec_and_metric_filtered(file_list):
#     '''Returns times series with value and time stamp'''
   
#     results = []
#     for file in file_list:
#         pred_id = file.split('_')[-1]
#         ts_hash = pd.read_parquet(file).time_series.values[0]
#         ts_dict = json.loads(decompress(b64decode(ts_hash)))
        
#     #print(ts_dict['underAttack'], vec, metric)
#         passed_values = []
#         blocked_values = []

#         for day, values in ts_dict['passedDays'].items():
#             for value in values['values']:
#                 passed_values.append(value)

#         for day, values in ts_dict['blockedDays'].items():
#             for value in values['values']:
#                 blocked_values.append(value)

#         # Some times passed_Values or blocked values are empty
#         if len(passed_values) == 0:
#             passed = pd.DataFrame({'passed_val':[],'passed_tmstmp':[]})
#         else:    
#             passed = pd.DataFrame(passed_values).rename(columns={'value': 'passed_val', 'timeStamp': 'passed_tmstmp'})
        
#         if len(blocked_values) == 0:
#             blocked = pd.DataFrame({'blocked_val':[],'blocked_tmstmp':[]})
#         else:    
#             blocked = pd.DataFrame(blocked_values).rename(columns={'value': 'blocked_val', 'timeStamp': 'blocked_tmstmp'})

#         #display(blocked.blocked_val.sum())
#         curr_res = pd.concat([passed, blocked], axis=1).drop(['blocked_tmstmp'], axis = 1)  
#         curr_res.set_index(['passed_tmstmp'], inplace=True) 
#         curr_res.index = pd.to_datetime(curr_res.index, unit = 's')
#         curr_res.sort_index()
#         curr_res.fillna(0, inplace=True)
#         curr_res['total_val'] = curr_res['passed_val'] + curr_res['blocked_val']
#         results.append((pred_id, curr_res))

#     return results

In [13]:
# def get_row_info(row):
#     res = {
#         'metric':row['metric'],
#         'vector':row['vector'],
#         'pred_id':row['prediction_id'],
#         'ip_ss':row['ip_ss'],
#         'ip_sc':row['ip_sc'],
#         'ip_nw':row['ip_nw'],
#         'ip_rl':row['ip_rl'],
#         'range_ss':row['range_ss'],
#         'range_sc':row['range_sc'],
#         'range_nw':row['range_nw'],
#         'range_rl':row['range_rl'],
#         'start_time':row['start_time'],
#         'end_time':row['end_time'],
#     }
#     return res

In [14]:
# def get_ts_by_vec_and_metric_filtered(df):
#     '''Returns times series with value and time stamp'''
#     file_list = df.file.values
#     results = []
#     for i,row in df.iterrows():
#         row_info = get_row_info(row)
#         file = row['file']
#         ts_hash = pd.read_parquet(file).time_series.values[0]
#         ts_dict = json.loads(decompress(b64decode(ts_hash)))
        
#     #print(ts_dict['underAttack'], vec, metric)
#         passed_values = []
#         blocked_values = []

#         for day, values in ts_dict['passedDays'].items():
#             for value in values['values']:
#                 passed_values.append(value)

#         for day, values in ts_dict['blockedDays'].items():
#             for value in values['values']:
#                 blocked_values.append(value)

#         # Some times passed_Values or blocked values are empty
#         if len(passed_values) == 0:
#             passed = pd.DataFrame({'passed_val':[],'passed_tmstmp':[]})
#         else:    
#             passed = pd.DataFrame(passed_values).rename(columns={'value': 'passed_val', 'timeStamp': 'passed_tmstmp'})
        
#         if len(blocked_values) == 0:
#             blocked = pd.DataFrame({'blocked_val':[],'blocked_tmstmp':[]})
#         else:    
#             blocked = pd.DataFrame(blocked_values).rename(columns={'value': 'blocked_val', 'timeStamp': 'blocked_tmstmp'})

#         #display(blocked.blocked_val.sum())
#         curr_res = pd.concat([passed, blocked], axis=1).drop(['blocked_tmstmp'], axis = 1)  
#         curr_res.set_index(['passed_tmstmp'], inplace=True) 
#         curr_res.index = pd.to_datetime(curr_res.index, unit = 's')
#         curr_res.sort_index()
#         curr_res.fillna(0, inplace=True)
#         curr_res['total_val'] = curr_res['passed_val'] + curr_res['blocked_val']

#         results.append((row_info, curr_res))

#     return results

In [15]:
# filtered_data = get_ts_by_vec_and_metric_filtered(filtered_df)
# filtered_data

In [16]:
def compute_features(df):
    def compute_column_features(col):
        mean = col.mean()
        median = col.median()
        mode = col.mode()[0]
        variance = col.var()
        std_dev = col.std()
        skewness = skew(col)
        kurt = kurtosis(col)
        iqr = col.quantile(0.75) - col.quantile(0.25)
        mad = col.mad()
        
        rms = np.sqrt(np.mean(col**2))

        
            
        median_crossing_rate = np.sum(np.diff(np.sign(col - col.median())) != 0) / (len(col) - 1)
        x = np.arange(len(col)).reshape(-1, 1)
        y = col.values.reshape(-1, 1)
        slope = LinearRegression().fit(x, y).coef_[0][0]
        max_abs_val = col.abs().max()

        # Check if the RMS value is zero
        if rms == 0:
            crest_factor = np.nan  # Assign NaN or any other appropriate value to the Crest Factor
        else:
            crest_factor = max_abs_val / rms
        
        mean_abs_val = col.abs().mean()
        if mean_abs_val == 0:
            shape_factor = np.nan  # Assign NaN or any other appropriate value to the Crest Factor
        else:
            shape_factor = rms / mean_abs_val

        avg_first_order_diff = np.mean(np.diff(col))

        return {
            'mean': mean,
            'median': median,
            'mode': mode,
            'variance': variance,
            'std_dev': std_dev,
            'skewness': skewness,
            'kurtosis': kurt,
            'iqr': iqr,
            'mad': mad,
            'rms': rms,
            'median_crossing_rate': median_crossing_rate,
            'slope': slope,
            'crest_factor': crest_factor,
            'shape_factor': shape_factor,
            'avg_first_order_diff': avg_first_order_diff
        }

    features = {
        'passed_val': compute_column_features(df['passed_val']),
        'blocked_val': compute_column_features(df['blocked_val']),
        'total_val': compute_column_features(df['total_val'])
    }

    return features


In [17]:
# def add_features_to_df(tpl, target_df):
#     # Extract the dictionary and time series DataFrame from the tuple
#     d, ts = tpl

#     # Compute the features using the updated compute_features() function
#     features = compute_features(ts)

#     # Find the index in target_df that matches the prediction_id, vector, and metric
#     index = target_df.loc[
#         (target_df['prediction_id'] == d['pred_id']) &
#         (target_df['vector'] == d['vector']) &
#         (target_df['metric'] == d['metric'])
#     ].index

#     # Check if there is a single matching row
#     if len(index) == 1:
#         # Add the computed features to the matching row in target_df
#         for col_name, col_features in features.items():
#             for feature_name, feature_value in col_features.items():
#                 target_df.at[index[0], f"{col_name}_{feature_name}"] = feature_value
#     else:
#         print(f"Error: Found {len(index)} matching rows for prediction_id={d['pred_id']}")


In [18]:
# for vec in vectors:
#     for met in metrics:
#         filtered_df = filter_data_frame(data,vector=vec, metric=met)
#         filtered_data = get_ts_by_vec_and_metric_filtered(filtered_df)
#         print('Create filtered_data for', vec, met)
#         for fd in filtered_data:
#             add_features_to_df(fd, data_cp)
#         print('added features data for', vec, met)

In [19]:
# data_cp.to_csv('./../../Itay_&_Mila_data/all_vector_metric_prediction_id-combinations/FileCatalog_improved.csv')
# data_cp.to_csv('./FileCatalog_improved.csv')

In [20]:
# data_improved = pd.read_csv('./FileCatalog_improved.csv')
# data_improved

### Recreating the parquet files

In [21]:
# # Creating the parquet files with the time series data frame
# for vec in vectors:
#     for met in metrics:
#         filtered_df = filter_data_frame(data_improved,vector=vec, metric=met)
#         print('filtered by vec and met')
#         filtered_data = get_ts_by_vec_and_metric_filtered(filtered_df)
#         print('created dic for ', vec, met)
#         for fd in filtered_data:
#             df = fd[1]
#             vec_met = vec + '_' + met
#             new_file_name = './../../Itay_&_Mila_data/all_vec_met/' +vec_met+'/' + vec_met + '_' + fd[0]['pred_id']
#             df.to_parquet(new_file_name)

### Finding Dominant IP for tcp_pps

In [22]:
filtered_df = data[data.prediction_id.isin(tcp_pps.prediction_id) & (data.metric == PPS)]
filtered_df

Unnamed: 0,id,prediction_id,original_file,vector,metric,request_id,ip_ss,ip_sc,ip_nw,ip_rl,...,total_val_slope,total_val_crest_factor,total_val_shape_factor,total_val_avg_first_order_diff,missing_tmstmp_percentage,percent_of_largest_dead,percent_of_zeros,total_time,total_val_median_share,super_peaks_file
13,4721,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_DNS_RESPONSE,PPS,395,1.5,2.0,3.5,4.0,...,-4.340234e-11,42.860486,23.423754,0.000000e+00,0.069444,0.401,0.998101,30.0,0.000000,
14,4715,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_LARGE_SYN,PPS,395,0.1,0.2,0.2,0.2,...,0.000000e+00,,,0.000000e+00,0.069444,1.000,1.000000,30.0,0.000000,
15,4729,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_UDP,PPS,395,2.0,3.5,4.5,6.0,...,7.327393e-09,78.650265,5.197817,7.956525e-21,0.069444,0.001,0.612879,30.0,0.000000,
16,4731,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_ICMP,PPS,395,1.0,1.5,2.0,5.0,...,1.228152e-09,39.228569,2.561462,4.219370e-22,0.069444,0.001,0.651031,30.0,0.000000,
17,4719,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_DNS,PPS,395,2.0,3.0,4.0,5.0,...,1.070680e-08,24.168058,17.900326,2.611991e-22,0.069444,0.033,0.993352,30.0,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40927,99573,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,TimeSeriesDataB64_4_900_1000.parquet,PT_DNS,PPS,4937,2.0,3.0,4.0,5.0,...,2.146930e-10,31.311605,3.243275,-6.250145e-09,0.000000,0.000,0.092407,30.0,0.000007,
40928,99579,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,TimeSeriesDataB64_4_900_1000.parquet,PT_SYN,PPS,4937,10.0,12.0,20.0,25.0,...,7.659665e-07,16.408442,1.143162,-1.691474e-06,0.000000,,0.000000,30.0,0.012796,
40929,99589,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,TimeSeriesDataB64_4_900_1000.parquet,PT_SSDP,PPS,4937,0.4,0.5,0.7,0.7,...,-3.609856e-10,35.666167,3.297041,-1.620408e-09,0.000000,0.000,0.045671,30.0,0.000007,
40930,99569,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,TimeSeriesDataB64_4_900_1000.parquet,PT_TCP,PPS,4937,207.0,232.0,303.0,353.0,...,2.287444e-05,13.238448,1.117075,-9.386097e-06,0.000000,,0.000000,30.0,0.417961,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...


In [23]:
# making sure it's the same size as tcp_pps
filtered_df[filtered_df.vector == PT_TCP]

Unnamed: 0,id,prediction_id,original_file,vector,metric,request_id,ip_ss,ip_sc,ip_nw,ip_rl,...,total_val_slope,total_val_crest_factor,total_val_shape_factor,total_val_avg_first_order_diff,missing_tmstmp_percentage,percent_of_largest_dead,percent_of_zeros,total_time,total_val_median_share,super_peaks_file
19,4717,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,TimeSeriesDataB64_0_0_100.parquet,PT_TCP,PPS,395,12.0,16.0,22.0,30.0,...,2.630002e-08,21.517561,1.368522,3.938011e-07,0.069444,0.000,0.002710,30.0,0.133333,
75,4773,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,TimeSeriesDataB64_0_0_100.parquet,PT_TCP,PPS,399,30.0,33.0,48.0,65.0,...,-4.983186e-07,10.844292,1.680916,-4.401408e-06,0.069446,0.000,0.000649,30.0,0.246351,
129,15757,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,TimeSeriesDataB64_0_9900_10000.parquet,PT_TCP,PPS,991,65.0,76.0,103.0,127.0,...,-6.077682e-05,10.181630,1.153405,-1.210012e-04,0.000000,,0.000000,30.0,0.487892,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
143,15783,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,TimeSeriesDataB64_0_10000_10100.parquet,PT_TCP,PPS,992,68.0,80.0,108.0,133.0,...,6.887129e-05,3.150070,1.214814,6.717285e-05,0.000000,,0.000000,30.0,0.462755,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
167,15809,4446402910b942444736abdc234a69c1be8cab947550b2...,TimeSeriesDataB64_0_10000_10100.parquet,PT_TCP,PPS,993,41.0,49.0,66.0,83.0,...,-3.822814e-06,5.481656,1.424059,1.461221e-04,0.000000,,0.000000,30.0,0.183726,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40740,105395,79296fdcd9d94f6a606db159963999bfc33cb8698ba71f...,TimeSeriesDataB64_4_6600_6700.parquet,PT_TCP,PPS,5521,45.0,51.0,68.0,80.0,...,7.071984e-05,7.656432,1.717695,1.049330e-06,0.000000,,0.000000,30.0,0.157329,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
40794,99413,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,TimeSeriesDataB64_4_700_800.parquet,PT_TCP,PPS,4914,32.0,36.0,48.0,57.0,...,9.513084e-06,25.928804,3.437735,4.490845e-07,0.000000,,0.000000,30.0,0.072127,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
40838,99465,99b741be8b52a7ff1e704abf80d85c000b0d9d33feedc3...,TimeSeriesDataB64_4_800_900.parquet,PT_TCP,PPS,4923,87.0,98.0,129.0,151.0,...,2.808362e-06,37.453768,1.834589,-3.132017e-07,0.000000,,0.000000,30.0,0.402095,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...
40884,99517,84bee7e6f334fb45dfa5b2089e06cece732e898ad7e1a2...,TimeSeriesDataB64_4_800_900.parquet,PT_TCP,PPS,4925,16.7,19.0,26.0,30.0,...,1.145592e-05,5.081980,1.156040,4.458437e-06,0.000000,0.002,0.002315,30.0,0.472412,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...


In [24]:
q_df = pd.DataFrame(columns=['vector','metric','prediction_id', 'iqr','daily_max_q90' ])

In [26]:
# Creating the parquet files with the time series data frame
c = 0
for i,row  in filtered_df.iterrows():
    ts_file = row['file']
    ts = pd.read_parquet(ts_file).sort_index()

    # my attempt to define dominant 
    q_25 = ts.passed_val.quantile(0.25)
    q_75 = ts.passed_val.quantile(0.75)
    iqr = q_75 - q_25

    # Milas attempt
    q_90 = ts.passed_val.quantile(0.9)

    ts_data = ts[~(ts['blocked_val'] > q_90*0.2)]
    ts_blocked = ts[(ts['blocked_val'] > q_90*0.2)]
    # ts_blocked.index = pd.to_datetime(ts_blocked.index)
    if isinstance(row['super_peaks_file'], str):
        sp = pd.read_csv(row['super_peaks_file'], ).sort_index()
        sp.set_index('passed_tmstmp', inplace=True)
        sp.index = pd.to_datetime(sp.index)
        ts_data = sp.loc[~sp.index.isin(ts_blocked.index)]

    # blocked_idx = ts[~(ts['blocked_val'] > 0)].index
    # ts_data = ts.loc[blocked_idx].copy()
    try:
        df_daily_max = ts_data.resample('D').max()
    except:
        print(ts_data.index)
    daily_max_q90 = df_daily_max.passed_val.quantile(0.9)
    q_df.loc[q_df.shape[0]] = [row['vector'],row['metric'],row['prediction_id'],iqr, daily_max_q90]
    c += 1
    if c%100 == 0:
        print(c)



100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT',
               'NaT',
               ...
               'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT',
               'NaT'],
              dtype='datetime64[ns]', name='passed_tmstmp', length=1439, freq=None)
DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT',
               'NaT',
               ...
               'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT',
               'NaT'],
              dtype='datetime64[ns]', name='passed_tmstmp', length=1439, freq=None)
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8

In [27]:
q_df

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90
0,PT_DNS_RESPONSE,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.00000
1,PT_LARGE_SYN,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.00000
2,PT_UDP,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00100,0.41000
3,PT_ICMP,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00100,0.03400
4,PT_DNS,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.08500
...,...,...,...,...,...
18826,PT_DNS,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.00013,0.01727
18827,PT_SYN,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.18160,3.15000
18828,PT_SSDP,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.00006,0.01720
18829,PT_TCP,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,3.34626,177.22395


In [28]:
pt_total = q_df[q_df['vector'] == PT_TOTAL].groupby('prediction_id').sum()
pt_total.rename(columns={'daily_max_q90':'daily_max_q90_pt_total','iqr':'iqr_pt_total'}, inplace=True)
pt_total = pt_total[['iqr_pt_total','daily_max_q90_pt_total']]
# pt_total
pt_total

Unnamed: 0_level_0,iqr_pt_total,daily_max_q90_pt_total
prediction_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0014b620a908408a271c669b88318efa12ef58a5d4e7546048768f53c5063b8b,2.558448,18.22307
00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e12f94e2aab0ff1ae09a,17.458070,51.08473
00e7bec3de3acce0b4374bd3aebad93011c5f4e4b61b64250e9f63c6a2d49fc5,20.050937,42.11333
00f2c9d7e8999ecea3b7ad6d7be2668eff3d1fb3c7694af98b9a083097c801b7,6.858370,18.87440
00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e59172b0b51b4ef02db,0.068930,25.83333
...,...,...
ff86751e01ba23e1f4a08d584579e5c262464bf4d2b8a255b0354b4770e26f45,2.376402,22.36353
ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81df04eb8373f279b5b9,0.510930,14.90133
ffa81ff27a5f6c2c209799ac11c8e4251ebe8f367bf39d3ee8f896f09da15de4,6.991235,58.99893
ffa889976f426a96e30a898edcb3d722584f93a5447d2cc4fd558a06a975e600,0.010000,4.88027


In [37]:
q_df_ = q_df.merge(pt_total, how='left', on='prediction_id')
q_df_ = q_df_[~(q_df_.vector == PT_TOTAL)]
q_df_

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total
0,PT_DNS_RESPONSE,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.00000,0.025000,0.810
1,PT_LARGE_SYN,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.00000,0.025000,0.810
2,PT_UDP,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00100,0.41000,0.025000,0.810
3,PT_ICMP,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00100,0.03400,0.025000,0.810
4,PT_DNS,PPS,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,0.00000,0.08500,0.025000,0.810
...,...,...,...,...,...,...,...
18826,PT_DNS,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.00013,0.01727,4.249177,125.336
18827,PT_SYN,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.18160,3.15000,4.249177,125.336
18828,PT_SSDP,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,0.00006,0.01720,4.249177,125.336
18829,PT_TCP,PPS,9db8b516e9321c4f3ad20cd9bec20ce34a7de1ebc35fe8...,3.34626,177.22395,4.249177,125.336


In [38]:

q_df_['iqr_share'] = (q_df_.iqr /q_df_.iqr_pt_total).round(2)
q_df_['daily_max_q90_share'] = (q_df_.daily_max_q90 /q_df_.daily_max_q90_pt_total).round(2)


In [39]:
q_df__ = q_df_.sort_values(by=['prediction_id', 'iqr_share'], ascending=[True, False])
q_df__

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share
9660,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.59185,18.223070,2.558448,18.22307,1.01,1.00
9665,PT_SYN,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.06667,1.200000,2.558448,18.22307,0.03,0.07
9661,PT_ICMP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.200000,2.558448,18.22307,0.00,0.01
9662,PT_SSDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.080002,2.558448,18.22307,0.00,0.00
9664,PT_UDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.266670,2.558448,18.22307,0.00,0.01
...,...,...,...,...,...,...,...,...,...
5270,PT_SYN,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06667,0.626509,6.841798,22.01060,0.01,0.03
5272,PT_ICMP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06666,0.442374,6.841798,22.01060,0.01,0.02
5268,PT_NTP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.133330,6.841798,22.01060,0.00,0.01
5269,PT_SSDP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.361864,6.841798,22.01060,0.00,0.02


In [40]:
q_df__[q_df__.daily_max_q90_share > 1]

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share
4205,PT_TCP,PPS,00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e1...,17.308968,63.196050,17.458070,51.08473,0.99,1.24
15850,PT_TCP,PPS,00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e...,0.056600,42.741000,0.068930,25.83333,0.82,1.65
5986,PT_TCP,PPS,0116fcaf3eedd0a34374c522dba92ddf11225da378566e...,12.395990,162.525000,12.659802,110.44427,0.98,1.47
194,PT_TCP,PPS,015114f0c8738c6f6aeb3cc35069a976630bf6b28691e3...,0.132270,28.789950,0.138870,12.51827,0.95,2.30
1950,PT_TCP,PPS,0152de605c2d8efdf42be0e287e5721fc7606143f3b519...,8.183720,95.566950,11.098600,74.77673,0.74,1.28
...,...,...,...,...,...,...,...,...,...
1457,PT_TCP,PPS,ff0995082e8cd3f512d21ad59adc5b18650ed1593a46f3...,10.450845,104.647950,10.680295,62.92987,0.98,1.66
4420,PT_TCP,PPS,ff2a97ac7c96e153cac7e84c788600da234ce78074d3b7...,1.119565,35.236050,1.118677,17.11767,1.00,2.06
2841,PT_TCP,PPS,ff86751e01ba23e1f4a08d584579e5c262464bf4d2b8a2...,2.261445,57.186855,2.376402,22.36353,0.95,2.56
172,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,0.488885,32.661000,0.510930,14.90133,0.96,2.19


In [41]:
q_df__['is_dominant_vector_max_daily_q90'] = (q_df__.daily_max_q90_share > 0.40) 
gb = q_df__.groupby('prediction_id').is_dominant_vector_max_daily_q90.sum()
gb = gb == 1
gb.name = 'has_dom_vec'

q_df__ = q_df__.merge(gb, how='left', on='prediction_id')
q_df__

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,is_dominant_vector_max_daily_q90,has_dom_vec
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.59185,18.223070,2.558448,18.22307,1.01,1.00,True,True
1,PT_SYN,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.06667,1.200000,2.558448,18.22307,0.03,0.07,False,True
2,PT_ICMP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.200000,2.558448,18.22307,0.00,0.01,False,True
3,PT_SSDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.080002,2.558448,18.22307,0.00,0.00,False,True
4,PT_UDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.266670,2.558448,18.22307,0.00,0.01,False,True
...,...,...,...,...,...,...,...,...,...,...,...
17079,PT_SYN,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06667,0.626509,6.841798,22.01060,0.01,0.03,False,False
17080,PT_ICMP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06666,0.442374,6.841798,22.01060,0.01,0.02,False,False
17081,PT_NTP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.133330,6.841798,22.01060,0.00,0.01,False,False
17082,PT_SSDP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.361864,6.841798,22.01060,0.00,0.02,False,False


In [42]:
q_df__['is_dom_vec'] = q_df__.is_dominant_vector_max_daily_q90
q_df__.drop(columns=['is_dominant_vector_max_daily_q90'], inplace=True)
q_df__

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.59185,18.223070,2.558448,18.22307,1.01,1.00,True,True
1,PT_SYN,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.06667,1.200000,2.558448,18.22307,0.03,0.07,True,False
2,PT_ICMP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.200000,2.558448,18.22307,0.00,0.01,True,False
3,PT_SSDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.080002,2.558448,18.22307,0.00,0.00,True,False
4,PT_UDP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,0.00000,0.266670,2.558448,18.22307,0.00,0.01,True,False
...,...,...,...,...,...,...,...,...,...,...,...
17079,PT_SYN,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06667,0.626509,6.841798,22.01060,0.01,0.03,False,False
17080,PT_ICMP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.06666,0.442374,6.841798,22.01060,0.01,0.02,False,False
17081,PT_NTP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.133330,6.841798,22.01060,0.00,0.01,False,False
17082,PT_SSDP,PPS,ffbf1e27561699143a619da9406e7454b58708b897fc65...,0.00000,0.361864,6.841798,22.01060,0.00,0.02,False,False


In [43]:
q_df__[(q_df__.has_dom_vec) & (q_df__.vector == PT_TCP)]

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.591850,18.22307,2.558448,18.22307,1.01,1.00,True,True
7,PT_TCP,PPS,00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e1...,17.308968,63.19605,17.458070,51.08473,0.99,1.24,True,True
18,PT_TCP,PPS,00e7bec3de3acce0b4374bd3aebad93011c5f4e4b61b64...,19.884885,40.73487,20.050937,42.11333,0.99,0.97,True,True
35,PT_TCP,PPS,00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e...,0.056600,42.74100,0.068930,25.83333,0.82,1.65,True,True
45,PT_TCP,PPS,0116fcaf3eedd0a34374c522dba92ddf11225da378566e...,12.395990,162.52500,12.659802,110.44427,0.98,1.47,True,True
...,...,...,...,...,...,...,...,...,...,...,...
16986,PT_TCP,PPS,fec9cc58eb076f0a867addf8dc3c0dabf3fcfcca748ab5...,12.000000,69.60000,12.000000,70.40000,1.00,0.99,True,True
17002,PT_TCP,PPS,fedc348ddaf887d807e08334d9f90b7a394bb691159f91...,0.268000,93.93705,0.275330,78.21947,0.97,1.20,True,True
17012,PT_TCP,PPS,ff0995082e8cd3f512d21ad59adc5b18650ed1593a46f3...,10.450845,104.64795,10.680295,62.92987,0.98,1.66,True,True
17022,PT_TCP,PPS,ff2a97ac7c96e153cac7e84c788600da234ce78074d3b7...,1.119565,35.23605,1.118677,17.11767,1.00,2.06,True,True


In [48]:
# Creating dom by had dom vec and is dom vec 
# Creating no dom 

dom_vec_tcp = q_df__[(q_df__.has_dom_vec) & (q_df__.is_dom_vec) & (q_df__.vector == PT_TCP)].copy()
no_dom_tcp = q_df__[~((q_df__.has_dom_vec) & (q_df__.is_dom_vec)) & (q_df__.vector == PT_TCP)].copy()
dom_tcp_pred_ids = dom_vec_tcp.prediction_id
no_dom_tcp_pred_ids = no_dom_tcp.prediction_id
display(dom_vec_tcp)
no_dom_tcp

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.591850,18.22307,2.558448,18.22307,1.01,1.00,True,True
7,PT_TCP,PPS,00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e1...,17.308968,63.19605,17.458070,51.08473,0.99,1.24,True,True
18,PT_TCP,PPS,00e7bec3de3acce0b4374bd3aebad93011c5f4e4b61b64...,19.884885,40.73487,20.050937,42.11333,0.99,0.97,True,True
35,PT_TCP,PPS,00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e...,0.056600,42.74100,0.068930,25.83333,0.82,1.65,True,True
45,PT_TCP,PPS,0116fcaf3eedd0a34374c522dba92ddf11225da378566e...,12.395990,162.52500,12.659802,110.44427,0.98,1.47,True,True
...,...,...,...,...,...,...,...,...,...,...,...
16986,PT_TCP,PPS,fec9cc58eb076f0a867addf8dc3c0dabf3fcfcca748ab5...,12.000000,69.60000,12.000000,70.40000,1.00,0.99,True,True
17002,PT_TCP,PPS,fedc348ddaf887d807e08334d9f90b7a394bb691159f91...,0.268000,93.93705,0.275330,78.21947,0.97,1.20,True,True
17012,PT_TCP,PPS,ff0995082e8cd3f512d21ad59adc5b18650ed1593a46f3...,10.450845,104.64795,10.680295,62.92987,0.98,1.66,True,True
17022,PT_TCP,PPS,ff2a97ac7c96e153cac7e84c788600da234ce78074d3b7...,1.119565,35.23605,1.118677,17.11767,1.00,2.06,True,True


Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
26,PT_TCP,PPS,00f2c9d7e8999ecea3b7ad6d7be2668eff3d1fb3c7694a...,3.480080,18.149700,6.858370,18.87440,0.51,0.96,False,True
66,PT_TCP,PPS,0152de605c2d8efdf42be0e287e5721fc7606143f3b519...,8.183720,95.566950,11.098600,74.77673,0.74,1.28,False,True
86,PT_TCP,PPS,019954282df88587cef692b7a66b8cbb56ec0ae3c72ee7...,7.069485,26.769000,14.635937,69.63673,0.48,0.38,True,False
108,PT_TCP,PPS,01eeafb73bed8b3cd5490572b42688bcecd98bb535bb24...,4.474020,65.475000,9.987635,57.57927,0.45,1.14,False,True
119,PT_TCP,PPS,0207fd90effb2a6dd523b5aa343337b26e0a0168e6ba14...,0.514060,9.834000,0.748295,6.46740,0.69,1.52,False,True
...,...,...,...,...,...,...,...,...,...,...,...
16992,PT_TCP,PPS,feca12d5e4a2d20cd025c6bad159cdef913b90cdc7c490...,0.000730,7.567950,0.108660,3.54120,0.01,2.14,False,True
17031,PT_TCP,PPS,ff86751e01ba23e1f4a08d584579e5c262464bf4d2b8a2...,2.261445,57.186855,2.376402,22.36353,0.95,2.56,False,True
17054,PT_TCP,PPS,ffa81ff27a5f6c2c209799ac11c8e4251ebe8f367bf39d...,2.744880,95.886000,6.991235,58.99893,0.39,1.63,False,True
17067,PT_TCP,PPS,ffa889976f426a96e30a898edcb3d722584f93a5447d2c...,0.000600,2.955000,0.010000,4.88027,0.06,0.61,False,True


In [46]:
# # making sure that dom_q_df_tcp and no_dom_q_df_tcp sum to tcp_pps, or close
# q_df__[~((q_df__.has_dom_vec) & (q_df__.is_dom_vec)) & (q_df__.vector == PT_TCP)] # 936
# dom_q_df_tcp # 851
# # 851 + 936 = 1787

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
26,PT_TCP,PPS,00f2c9d7e8999ecea3b7ad6d7be2668eff3d1fb3c7694a...,3.480080,18.149700,6.858370,18.87440,0.51,0.96,False,True
66,PT_TCP,PPS,0152de605c2d8efdf42be0e287e5721fc7606143f3b519...,8.183720,95.566950,11.098600,74.77673,0.74,1.28,False,True
86,PT_TCP,PPS,019954282df88587cef692b7a66b8cbb56ec0ae3c72ee7...,7.069485,26.769000,14.635937,69.63673,0.48,0.38,True,False
108,PT_TCP,PPS,01eeafb73bed8b3cd5490572b42688bcecd98bb535bb24...,4.474020,65.475000,9.987635,57.57927,0.45,1.14,False,True
119,PT_TCP,PPS,0207fd90effb2a6dd523b5aa343337b26e0a0168e6ba14...,0.514060,9.834000,0.748295,6.46740,0.69,1.52,False,True
...,...,...,...,...,...,...,...,...,...,...,...
16992,PT_TCP,PPS,feca12d5e4a2d20cd025c6bad159cdef913b90cdc7c490...,0.000730,7.567950,0.108660,3.54120,0.01,2.14,False,True
17031,PT_TCP,PPS,ff86751e01ba23e1f4a08d584579e5c262464bf4d2b8a2...,2.261445,57.186855,2.376402,22.36353,0.95,2.56,False,True
17054,PT_TCP,PPS,ffa81ff27a5f6c2c209799ac11c8e4251ebe8f367bf39d...,2.744880,95.886000,6.991235,58.99893,0.39,1.63,False,True
17067,PT_TCP,PPS,ffa889976f426a96e30a898edcb3d722584f93a5447d2c...,0.000600,2.955000,0.010000,4.88027,0.06,0.61,False,True


In [49]:
# tcp_pps_with_dom_vec = tcp_pps[tcp_pps.prediction_id.isin(dom_tcp_pred_ids)]
# tcp_pps_with_dom_vec


Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.46400,1.438101,0.968000,2.950200,14.94100,1.233787,0.74000,2.759800,17.05900,16.32800,15.79300,15.46400
2,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,65.0,36.82500,10.827847,10.168050,17.704560,24.30600,9.568966,8.89995,15.388140,105.93900,70.05795,61.31595,36.82500
3,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,68.0,64.64205,26.075037,28.985475,44.062260,59.09805,23.979144,25.00905,41.581350,75.12705,64.87200,64.67205,64.64205
13,2a6260ff3417353e98bdd0fcfba4a04dcde1819051847f...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,108.0,71.98095,3.799644,1.309500,9.737850,67.10595,3.611941,1.01700,8.835300,84.06795,77.94300,73.90200,71.98095
16,1ceb492a668f410257bcde9012879874ec5506d9ab798d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,38.0,25.77300,2.231182,1.996950,2.714055,20.78205,2.142454,1.94100,2.608440,27.60000,27.17505,26.03700,25.77300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1777,a8088c3a03b7882087bf5e761e1202d863371617bd0163...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,109.0,78.81495,7.478439,4.480500,19.453005,77.72295,5.401334,0.24000,16.155705,83.98200,80.48895,80.35305,78.81495
1778,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.60000,32.273194,27.800000,53.800000,118.20000,27.985139,24.70000,45.200000,206.00000,159.40000,147.00000,142.60000
1781,131ae1afeec15cf1baea1d5de58b6bd42dce6df7d8bf2d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,81.0,57.16695,4.658101,3.408000,7.774440,51.50505,4.066231,3.07650,6.620250,58.53600,58.37895,57.40905,57.16695
1783,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,32.0,19.74795,1.831723,1.050975,4.537740,6.05805,0.227902,0.03195,0.675600,32.27895,27.07005,24.79005,19.74795


In [122]:
# tcp_pps[tcp_pps.prediction_id.isin(no_dom_pred_ids) ].to_csv('./no_dominant_17-10_11_7_.csv')

### Determine dominant ip

In [51]:
ip_stats_df = pd.read_csv('./ip_stats.csv')
ip_stats_df

Unnamed: 0,request_id,file,prediction_id,vector,metric,type,ts_length
0,595,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,,BW,DST_IP,2845968
1,595,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,,PPS,DST_IP,2054568
2,596,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,3915383f119c203570e1e8c0565667ddce7dc77dd8d0f2...,,BW,DST_IP,56380
3,596,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,3915383f119c203570e1e8c0565667ddce7dc77dd8d0f2...,,PPS,DST_IP,54272
4,597,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,5179ba85f65fdf9effc0c0a0ed3adf34e83287130bdfea...,,BW,DST_IP,55688
...,...,...,...,...,...,...,...
9095,5527,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,f2f8fd63f48ab61a1f0bf2f547668546f8997d69a8dfd3...,,BW,DST_IP,9239476
9096,5527,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,f2f8fd63f48ab61a1f0bf2f547668546f8997d69a8dfd3...,,PPS,DST_IP,8024096
9097,5528,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,4f0c1ecd521cc4454b7ff8c25739646cf22b070604bc68...,,BW,DST_IP,8033568
9098,5528,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,4f0c1ecd521cc4454b7ff8c25739646cf22b070604bc68...,,PPS,DST_IP,7033276


In [52]:
ip_stats_filtered = ip_stats_df[ip_stats_df.prediction_id.isin(dom_tcp_pred_ids) & (ip_stats_df.ts_length > 10000) & (ip_stats_df.metric == PPS)].copy()
ip_stats_filtered

Unnamed: 0,request_id,file,prediction_id,vector,metric,type,ts_length
9,599,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,,PPS,DST_IP,2860612
165,677,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,198078bc0bace2310fee6f14f636d60b77d0a4ba419d00...,,PPS,DST_IP,2176076
217,703,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,ce1899c32d39716592d47b767ec16715e6979c3f2a2f51...,,PPS,DST_IP,1801124
259,724,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,37efed398920eb41123b1ba56b7df708668e7d2a631e22...,PT_TCP,PPS,DST_IP,2204192
261,725,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,417e26d9207377228ad822f6ce3543fe3b3fd4d7832b1f...,PT_TCP,PPS,DST_IP,2614508
...,...,...,...,...,...,...,...
9048,5498,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,86673bffba66765ba6ea83da26f2af4627b7e88fb61ba5...,,PPS,DST_IP,1767364
9052,5500,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,f5f7fbd21153efefd3f9e63f81f4f28f6b365b7889caa2...,,PPS,DST_IP,2782284
9074,5516,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,a8088c3a03b7882087bf5e761e1202d863371617bd0163...,,PPS,DST_IP,2699804
9076,5517,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,,PPS,DST_IP,1942380


In [53]:
tcp_pps_filtered = tcp_pps[tcp_pps.prediction_id.isin(dom_tcp_pred_ids)]
print(tcp_pps_filtered.columns)
tcp_pps_filtered

Index(['prediction_id', 'vector', 'metric', 'file', 'super_peaks_file',
       'ip_ss', 'daily_max_q90', 'daily_mean_q90', 'daily_median_q90',
       'daily_q90_q90', 'daily_max_q70', 'daily_mean_q70', 'daily_median_q70',
       'daily_q90_q70', 'daily_max_1', 'daily_max_2', 'daily_max_3',
       'daily_max_4'],
      dtype='object')


Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.46400,1.438101,0.968000,2.950200,14.94100,1.233787,0.74000,2.759800,17.05900,16.32800,15.79300,15.46400
2,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,65.0,36.82500,10.827847,10.168050,17.704560,24.30600,9.568966,8.89995,15.388140,105.93900,70.05795,61.31595,36.82500
3,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,68.0,64.64205,26.075037,28.985475,44.062260,59.09805,23.979144,25.00905,41.581350,75.12705,64.87200,64.67205,64.64205
13,2a6260ff3417353e98bdd0fcfba4a04dcde1819051847f...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,108.0,71.98095,3.799644,1.309500,9.737850,67.10595,3.611941,1.01700,8.835300,84.06795,77.94300,73.90200,71.98095
16,1ceb492a668f410257bcde9012879874ec5506d9ab798d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,38.0,25.77300,2.231182,1.996950,2.714055,20.78205,2.142454,1.94100,2.608440,27.60000,27.17505,26.03700,25.77300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1777,a8088c3a03b7882087bf5e761e1202d863371617bd0163...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,109.0,78.81495,7.478439,4.480500,19.453005,77.72295,5.401334,0.24000,16.155705,83.98200,80.48895,80.35305,78.81495
1778,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.60000,32.273194,27.800000,53.800000,118.20000,27.985139,24.70000,45.200000,206.00000,159.40000,147.00000,142.60000
1781,131ae1afeec15cf1baea1d5de58b6bd42dce6df7d8bf2d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,81.0,57.16695,4.658101,3.408000,7.774440,51.50505,4.066231,3.07650,6.620250,58.53600,58.37895,57.40905,57.16695
1783,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,32.0,19.74795,1.831723,1.050975,4.537740,6.05805,0.227902,0.03195,0.675600,32.27895,27.07005,24.79005,19.74795


In [56]:
dom_vec_tcp

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.591850,18.22307,2.558448,18.22307,1.01,1.00,True,True
7,PT_TCP,PPS,00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e1...,17.308968,63.19605,17.458070,51.08473,0.99,1.24,True,True
18,PT_TCP,PPS,00e7bec3de3acce0b4374bd3aebad93011c5f4e4b61b64...,19.884885,40.73487,20.050937,42.11333,0.99,0.97,True,True
35,PT_TCP,PPS,00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e...,0.056600,42.74100,0.068930,25.83333,0.82,1.65,True,True
45,PT_TCP,PPS,0116fcaf3eedd0a34374c522dba92ddf11225da378566e...,12.395990,162.52500,12.659802,110.44427,0.98,1.47,True,True
...,...,...,...,...,...,...,...,...,...,...,...
16986,PT_TCP,PPS,fec9cc58eb076f0a867addf8dc3c0dabf3fcfcca748ab5...,12.000000,69.60000,12.000000,70.40000,1.00,0.99,True,True
17002,PT_TCP,PPS,fedc348ddaf887d807e08334d9f90b7a394bb691159f91...,0.268000,93.93705,0.275330,78.21947,0.97,1.20,True,True
17012,PT_TCP,PPS,ff0995082e8cd3f512d21ad59adc5b18650ed1593a46f3...,10.450845,104.64795,10.680295,62.92987,0.98,1.66,True,True
17022,PT_TCP,PPS,ff2a97ac7c96e153cac7e84c788600da234ce78074d3b7...,1.119565,35.23605,1.118677,17.11767,1.00,2.06,True,True


In [59]:
tcp_pps_with_dom_vec

Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.46400,1.438101,0.968000,2.950200,14.94100,1.233787,0.74000,2.759800,17.05900,16.32800,15.79300,15.46400
2,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,65.0,36.82500,10.827847,10.168050,17.704560,24.30600,9.568966,8.89995,15.388140,105.93900,70.05795,61.31595,36.82500
3,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,68.0,64.64205,26.075037,28.985475,44.062260,59.09805,23.979144,25.00905,41.581350,75.12705,64.87200,64.67205,64.64205
13,2a6260ff3417353e98bdd0fcfba4a04dcde1819051847f...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,108.0,71.98095,3.799644,1.309500,9.737850,67.10595,3.611941,1.01700,8.835300,84.06795,77.94300,73.90200,71.98095
16,1ceb492a668f410257bcde9012879874ec5506d9ab798d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,38.0,25.77300,2.231182,1.996950,2.714055,20.78205,2.142454,1.94100,2.608440,27.60000,27.17505,26.03700,25.77300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1777,a8088c3a03b7882087bf5e761e1202d863371617bd0163...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,109.0,78.81495,7.478439,4.480500,19.453005,77.72295,5.401334,0.24000,16.155705,83.98200,80.48895,80.35305,78.81495
1778,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.60000,32.273194,27.800000,53.800000,118.20000,27.985139,24.70000,45.200000,206.00000,159.40000,147.00000,142.60000
1781,131ae1afeec15cf1baea1d5de58b6bd42dce6df7d8bf2d...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,81.0,57.16695,4.658101,3.408000,7.774440,51.50505,4.066231,3.07650,6.620250,58.53600,58.37895,57.40905,57.16695
1783,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,32.0,19.74795,1.831723,1.050975,4.537740,6.05805,0.227902,0.03195,0.675600,32.27895,27.07005,24.79005,19.74795


In [65]:
# merging to add ip_states file
dom_vec_tcp_m_1 = dom_vec_tcp.merge(ip_stats_filtered[['file','prediction_id','metric']], on=['prediction_id','metric'], how='left')
# merging to add ts_file
dom_vec_tcp_m_2 = dom_vec_tcp_m_1.merge(tcp_pps_with_dom_vec[['super_peaks_file','file', 'prediction_id','metric']], on=['prediction_id','metric'], how='left')

dom_vec_tcp_m_2['file_ts'] = dom_vec_tcp_m_2['file_y']
dom_vec_tcp_m_2['file_ip_stats'] = dom_vec_tcp_m_2['file_x']
dom_vec_tcp_m_2 = dom_vec_tcp_m_2.drop(columns=['file_y','file_x'])
dom_vec_tcp_m_2

Unnamed: 0,vector,metric,prediction_id,iqr,daily_max_q90,iqr_pt_total,daily_max_q90_pt_total,iqr_share,daily_max_q90_share,has_dom_vec,is_dom_vec,super_peaks_file,file_ts,file_ip_stats
0,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,2.591850,18.22307,2.558448,18.22307,1.01,1.00,True,True,,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
1,PT_TCP,PPS,00cf886c40fb7cd3722ba85e29dd85ae15a94b29d063e1...,17.308968,63.19605,17.458070,51.08473,0.99,1.24,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
2,PT_TCP,PPS,00e7bec3de3acce0b4374bd3aebad93011c5f4e4b61b64...,19.884885,40.73487,20.050937,42.11333,0.99,0.97,True,True,,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
3,PT_TCP,PPS,00fe4f0117dc2217338ee6a0c1dc29dce78ae995c8c17e...,0.056600,42.74100,0.068930,25.83333,0.82,1.65,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
4,PT_TCP,PPS,0116fcaf3eedd0a34374c522dba92ddf11225da378566e...,12.395990,162.52500,12.659802,110.44427,0.98,1.47,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
846,PT_TCP,PPS,fec9cc58eb076f0a867addf8dc3c0dabf3fcfcca748ab5...,12.000000,69.60000,12.000000,70.40000,1.00,0.99,True,True,,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
847,PT_TCP,PPS,fedc348ddaf887d807e08334d9f90b7a394bb691159f91...,0.268000,93.93705,0.275330,78.21947,0.97,1.20,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
848,PT_TCP,PPS,ff0995082e8cd3f512d21ad59adc5b18650ed1593a46f3...,10.450845,104.64795,10.680295,62.92987,0.98,1.66,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...
849,PT_TCP,PPS,ff2a97ac7c96e153cac7e84c788600da234ce78074d3b7...,1.119565,35.23605,1.118677,17.11767,1.00,2.06,True,True,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/ip_stats\IpStatsDataB...


In [66]:
def get_blocked_idx(ts):
    # ts should be from file_ts
    ts_data = ts[~(ts['blocked_val'] > q_90*0.2)]
    ts_blocked = ts[(ts['blocked_val'] > q_90*0.2)]
    ts_blocked.index = pd.to_datetime(ts_blocked.index)
    return ts_blocked.index

In [67]:
def get_ts_features(ts,blocked_idx, row):
    q_90 = ts.passed_val.quantile(0.9)

    ts_data = ts.loc[~blocked_idx]
    try:
        df_daily_max = ts_data.resample('D').max()
    except:
        df_daily_max = pd.DataFrame([0], columns=['passed_val'])
        print(row['prediction_id'])
    daily_max_q90 = df_daily_max.passed_val.quantile(0.9)
    new_row = [row['vector'],row['metric'],row['prediction_id'], daily_max_q90]
    return new_row



In [78]:
q_ip_df = pd.DataFrame(columns=['ip','vector','metric', 'prediction_id', 'daily_max_q90_pt_total','daily_max_q90_share', 'has_dom_vec','is_dom_vec','daily_max_q90_ip', 'daily_mean_q90', 'daily_median_q90',
       'daily_q90_q90', 'daily_max_q70', 'daily_mean_q70', 'daily_median_q70',
       'daily_q90_q70', 'daily_max_1', 'daily_max_2', 'daily_max_3',
       'daily_max_4'])

In [80]:
# Going over sample with dominant vec and creating features for dominant ip
all_nat_pred = []
cnt = 0
for i,row in dom_vec_tcp_m_2.iterrows():
    cnt +=1
    file_ts       = row['file_ts']
    file_ip_stats = row['file_ip_stats']

    vec = row['vector']
    metric = row['metric']
    pred_id = row['prediction_id']
    daily_max_q90_pt_total = row['daily_max_q90_pt_total']
    daily_max_q90_share = row['daily_max_q90_share']
    has_dom_vec = row['has_dom_vec']
    is_dom_vec = row['is_dom_vec']

    ts_df = pd.read_parquet(file_ts)
    
    blocked_idx = get_blocked_idx(ts_df)
    
    ip_stats_pred = pd.read_parquet(file_ip_stats)
    ts_ip_hash = ip_stats_pred[(ip_stats_pred.prediction_id == pred_id) & (ip_stats_pred.metric == PPS)].time_series.values[0]
    ts_ip_dict = json.loads(decompress(b64decode(ts_ip_hash)))
    for ip in ts_ip_dict:
        ts_ip = extract_ts_from_dict(ts_ip_dict[ip])
        
        ts_ip = ts_ip.loc[~ts_ip.index.isin(blocked_idx)]
        all_nat = ts_ip.index.isna().all()
        if all_nat:
            all_nat_pred.append(pred_id)
        else:
            ip_features = calc_features_from_ts(ts_ip)
            # ip_new_row = get_ts_features(ts_ip, row)
            ip_new_row = [ip, vec, metric, pred_id, daily_max_q90_pt_total,daily_max_q90_share, has_dom_vec, is_dom_vec, *ip_features]
            q_ip_df.loc[q_ip_df.shape[0]] = ip_new_row
      
    if cnt%100 == 0:
        print(cnt)
        
        

100
200
300
400
500
600
700
800


In [81]:
# checking all time series that have bad data
len(all_nat_pred)

235

In [82]:
q_ip_df[q_ip_df.daily_max_q90_pt_total.isna()]

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_pt_total,daily_max_q90_share,has_dom_vec,is_dom_vec,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4


In [211]:
# q_ip_df_cp = q_ip_df.merge(pt_total['daily_max_q90_pt_total'], on='prediction_id', how='left').copy()
# q_ip_df_cp['daily_max_q90_pt_total'] = q_ip_df_cp['daily_max_q90_pt_total_y']
# q_ip_df_cp = q_ip_df_cp.drop(columns=['daily_max_q90_pt_total_x', 'daily_max_q90_pt_total_y'])

# q_ip_df_cp

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total
0,9e91f93a0ff3238166beefffa958a47e178a448cb96828...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,2.6970,0.165554,0.137,0.31240,1.8250,0.142571,0.113,0.25820,4.486,3.350,3.283,2.697,17.061
1,339b13e25cb0736e4f40f05731618a545e09ba6bc62b64...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,13.3030,0.784256,0.414,1.64800,10.9500,0.664775,0.318,1.54920,16.246,14.616,13.423,13.303,17.061
2,9d6c7bdb42ea30580955716df567394b9d663baeee37a5...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,14.2520,0.619796,0.337,1.39020,11.7410,0.548309,0.289,1.26160,15.877,15.192,14.864,14.252,17.061
3,cc937b58cea1735517d13613c745ef5105b3c28da906c3...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.1023,0.004654,0.003,0.00500,0.1009,0.004515,0.003,0.00500,0.103,0.096,,,17.061
4,99d4f9abd26617ba91f72291e8d4d476d98004b8884647...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.0990,0.004886,0.003,0.00635,0.0990,0.004716,0.003,0.00605,0.099,0.099,,,17.061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9665,c80ff14c8426e137053f05e9bd8a3c83eb852e52b6574f...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,86.6000,1.608750,1.200,3.20000,61.2000,1.308472,0.800,2.80000,124.800,101.800,93.000,86.600,142.600
9666,39475dc8dc65e6bd30126dcc4bb1da3fdaecd03cbcda61...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,32.0000,0.800694,0.400,1.80000,25.0000,0.724306,0.400,1.60000,38.000,35.400,32.800,32.000,142.600
9667,bb57358662311f9248b306d5169e49a823572a8890b903...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,42.0000,3.761806,2.600,9.20000,23.4000,0.586111,0.200,1.40000,77.200,53.800,42.800,42.000,142.600
9668,a17c1fee39e04fc75efa34c06d8c5b5b869975c14e365c...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,11.6000,2.327500,2.200,4.00000,8.6000,2.059028,2.000,3.60000,32.800,14.400,13.200,11.600,142.600


Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total


In [84]:
q_ip_df['daily_max_q90_share_ip'] = q_ip_df.daily_max_q90_ip / q_ip_df.daily_max_q90_pt_total
q_ip_df

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_pt_total,daily_max_q90_share,has_dom_vec,is_dom_vec,daily_max_q90_ip,daily_mean_q90,...,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_share_ip
0,a0e98462bf3029da6d1a35b2551aa186bea2de2672d7e8...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.06667,0.000370,...,0.000000,0.06667,0.000324,0.000000,0.000000,0.20000,0.13333,0.06667,0.06667,0.003659
1,ac86d62fc952cb0fac220bea7085f156bfda6213dfb553...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.06667,0.000370,...,0.000000,0.06667,0.000253,0.000000,0.000000,0.21747,0.06667,0.06667,0.06667,0.003659
2,56cfd918d5b900049458974490e442a77122e57cd235fb...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.20000,0.008730,...,0.066670,0.19787,0.007924,0.000000,0.066670,0.20000,0.20000,0.20000,0.20000,0.010975
3,60574c10d7e118d76326132a628f49af7e903abde8f381...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,17.88433,1.720352,...,5.097970,16.51687,1.482488,0.198665,4.738344,19.16107,19.06540,18.16573,17.88433,0.981411
4,b0a98c5e8365368d9739a7fe53dcbb31db8d315649ea33...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,15.40087,0.949049,...,4.115730,13.99147,0.877734,0.000000,3.651576,19.88867,18.59893,17.90633,15.40087,0.845130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10216,87df4a69cb88a1098142712959259e3978383e8073b144...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,0.31327,0.047053,...,0.046007,0.30427,0.046089,0.034930,0.045070,8.82153,8.15580,5.90633,0.31327,0.021023
10217,2c007dbe8fb3d29b6d0c684975c4e5d9e91a37cade81ea...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,8.91440,0.012025,...,0.009330,0.07827,0.006411,0.005870,0.008737,15.12273,13.18213,9.02747,8.91440,0.598228
10218,85d1161a900901e580fb2ea069c241be4a7af411ae0135...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,0.31173,0.046696,...,0.045413,0.30313,0.045548,0.034670,0.044083,6.59680,5.29733,4.99760,0.31173,0.020920
10219,8c2ee633832ccd2ed3c8843e68916720e57b0341020ca7...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,2.71427,0.197617,...,0.518813,1.54273,0.152138,0.096500,0.349324,5.47047,3.65673,2.93180,2.71427,0.182150


In [86]:
q_ip_df['is_dom_ip'] = (q_ip_df.daily_max_q90_share_ip > 0.40) 
gb = q_ip_df.groupby('prediction_id').is_dom_ip.sum()

In [87]:
gb[gb > 1]

prediction_id
0014b620a908408a271c669b88318efa12ef58a5d4e7546048768f53c5063b8b    2
015114f0c8738c6f6aeb3cc35069a976630bf6b28691e3232457eab6057196ba    2
032e1e00112f1ddd9efd8e45e5b7937d889c6bf9449b84200f0537341dab4159    2
037839f19c31bab596a81cc6d31aa5036b1c626035da413508d7bd3e783534ef    2
04bb6456527cc2f68733f7405b075980c4f802219e24e8a9c47476874f7919d1    3
                                                                   ..
fc432a4086b23b3681f0713e0224e872ab86dc6797121b5a6a690d6e1c7c5c9d    2
fce3da8dd1e2024deecff5d12d15d940d48162f7cb7071313aa296bbce26c503    2
fec9cc58eb076f0a867addf8dc3c0dabf3fcfcca748ab5a026a0d1e649a8e280    2
fedc348ddaf887d807e08334d9f90b7a394bb691159f912e9eb7fd67f537592f    2
ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81df04eb8373f279b5b9    4
Name: is_dom_ip, Length: 321, dtype: int64

In [88]:
gb = gb == 1
gb.name = 'has_dom_ip'
q_ip_df = q_ip_df.merge(gb, how='left', on='prediction_id')
q_ip_df

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_pt_total,daily_max_q90_share,has_dom_vec,is_dom_vec,daily_max_q90_ip,daily_mean_q90,...,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
0,a0e98462bf3029da6d1a35b2551aa186bea2de2672d7e8...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.06667,0.000370,...,0.000324,0.000000,0.000000,0.20000,0.13333,0.06667,0.06667,0.003659,False,False
1,ac86d62fc952cb0fac220bea7085f156bfda6213dfb553...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.06667,0.000370,...,0.000253,0.000000,0.000000,0.21747,0.06667,0.06667,0.06667,0.003659,False,False
2,56cfd918d5b900049458974490e442a77122e57cd235fb...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,0.20000,0.008730,...,0.007924,0.000000,0.066670,0.20000,0.20000,0.20000,0.20000,0.010975,False,False
3,60574c10d7e118d76326132a628f49af7e903abde8f381...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,17.88433,1.720352,...,1.482488,0.198665,4.738344,19.16107,19.06540,18.16573,17.88433,0.981411,True,False
4,b0a98c5e8365368d9739a7fe53dcbb31db8d315649ea33...,PT_TCP,PPS,0014b620a908408a271c669b88318efa12ef58a5d4e754...,18.22307,1.00,True,True,15.40087,0.949049,...,0.877734,0.000000,3.651576,19.88867,18.59893,17.90633,15.40087,0.845130,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10216,87df4a69cb88a1098142712959259e3978383e8073b144...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,0.31327,0.047053,...,0.046089,0.034930,0.045070,8.82153,8.15580,5.90633,0.31327,0.021023,False,False
10217,2c007dbe8fb3d29b6d0c684975c4e5d9e91a37cade81ea...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,8.91440,0.012025,...,0.006411,0.005870,0.008737,15.12273,13.18213,9.02747,8.91440,0.598228,True,False
10218,85d1161a900901e580fb2ea069c241be4a7af411ae0135...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,0.31173,0.046696,...,0.045548,0.034670,0.044083,6.59680,5.29733,4.99760,0.31173,0.020920,False,False
10219,8c2ee633832ccd2ed3c8843e68916720e57b0341020ca7...,PT_TCP,PPS,ffa22788370b1ac7f54c6ece5e0e2b61dc24f1669bda81...,14.90133,2.19,True,True,2.71427,0.197617,...,0.152138,0.096500,0.349324,5.47047,3.65673,2.93180,2.71427,0.182150,False,False


In [None]:
q_ip_df[(q_ip_df.is_dom_ip) & (q_ip_df.has_dom_ip)].to_csv('./dominant_v3.csv')

In [96]:
dom_pred_id_final = q_ip_df[(q_ip_df.is_dom_ip) & (q_ip_df.has_dom_ip)].prediction_id
tcp_pps[~(tcp_pps.prediction_id.isin(dom_pred_id_final))].to_csv('./no_dominant_v3.csv')

In [250]:
q_ip_df_cp

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
0,9e91f93a0ff3238166beefffa958a47e178a448cb96828...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,2.6970,0.165554,0.137,0.31240,1.8250,0.142571,0.113,0.25820,4.486,3.350,3.283,2.697,17.061,0.158080,False,False
1,339b13e25cb0736e4f40f05731618a545e09ba6bc62b64...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,13.3030,0.784256,0.414,1.64800,10.9500,0.664775,0.318,1.54920,16.246,14.616,13.423,13.303,17.061,0.779732,True,False
2,9d6c7bdb42ea30580955716df567394b9d663baeee37a5...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,14.2520,0.619796,0.337,1.39020,11.7410,0.548309,0.289,1.26160,15.877,15.192,14.864,14.252,17.061,0.835355,True,False
3,cc937b58cea1735517d13613c745ef5105b3c28da906c3...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.1023,0.004654,0.003,0.00500,0.1009,0.004515,0.003,0.00500,0.103,0.096,,,17.061,0.005996,False,False
4,99d4f9abd26617ba91f72291e8d4d476d98004b8884647...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.0990,0.004886,0.003,0.00635,0.0990,0.004716,0.003,0.00605,0.099,0.099,,,17.061,0.005803,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9665,c80ff14c8426e137053f05e9bd8a3c83eb852e52b6574f...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,86.6000,1.608750,1.200,3.20000,61.2000,1.308472,0.800,2.80000,124.800,101.800,93.000,86.600,142.600,0.607293,True,False
9666,39475dc8dc65e6bd30126dcc4bb1da3fdaecd03cbcda61...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,32.0000,0.800694,0.400,1.80000,25.0000,0.724306,0.400,1.60000,38.000,35.400,32.800,32.000,142.600,0.224404,False,False
9667,bb57358662311f9248b306d5169e49a823572a8890b903...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,42.0000,3.761806,2.600,9.20000,23.4000,0.586111,0.200,1.40000,77.200,53.800,42.800,42.000,142.600,0.294530,False,False
9668,a17c1fee39e04fc75efa34c06d8c5b5b869975c14e365c...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,11.6000,2.327500,2.200,4.00000,8.6000,2.059028,2.000,3.60000,32.800,14.400,13.200,11.600,142.600,0.081346,False,False


In [235]:
# merging to add daily_max_q90_share_ip daily_max_q90_pt_total, is_dom_ip, has_dom_ip
tcp_pps.merge(q_ip_df_cp[['prediction_id','vector','metric','daily_max_q90_pt_total','daily_max_q90_share_ip','is_dom_ip','has_dom_ip']], on=['prediction_id', 'vector','metric'], how='right')

Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,...,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
0,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.464,1.438101,0.968,2.9502,...,0.74,2.7598,17.059,16.328,15.793,15.464,17.061,0.158080,False,False
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.464,1.438101,0.968,2.9502,...,0.74,2.7598,17.059,16.328,15.793,15.464,17.061,0.779732,True,False
2,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.464,1.438101,0.968,2.9502,...,0.74,2.7598,17.059,16.328,15.793,15.464,17.061,0.835355,True,False
3,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.464,1.438101,0.968,2.9502,...,0.74,2.7598,17.059,16.328,15.793,15.464,17.061,0.005996,False,False
4,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.464,1.438101,0.968,2.9502,...,0.74,2.7598,17.059,16.328,15.793,15.464,17.061,0.005803,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9665,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.600,32.273194,27.800,53.8000,...,24.70,45.2000,206.000,159.400,147.000,142.600,142.600,0.607293,True,False
9666,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.600,32.273194,27.800,53.8000,...,24.70,45.2000,206.000,159.400,147.000,142.600,142.600,0.224404,False,False
9667,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.600,32.273194,27.800,53.8000,...,24.70,45.2000,206.000,159.400,147.000,142.600,142.600,0.294530,False,False
9668,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,163.0,142.600,32.273194,27.800,53.8000,...,24.70,45.2000,206.000,159.400,147.000,142.600,142.600,0.081346,False,False


In [228]:
pred_id_dom = q_ip_df_cp[(q_ip_df_cp.has_dom_ip) & (q_ip_df_cp.is_dom_ip)].prediction_id
tcp_pps[~tcp_pps.prediction_id.isin(pred_id_dom)].to_csv('./')

Unnamed: 0,prediction_id,vector,metric,file,super_peaks_file,ip_ss,daily_max_q90,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4
0,a85101f9a44c3e7fa26ad3a8f5258b1f6b780aed7e1c38...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,12.0,0.32800,0.013707,0.012000,0.026000,0.22200,0.013174,0.011000,0.02400,0.36100,0.34400,0.33900,0.32800
1,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,,30.0,15.46400,1.438101,0.968000,2.950200,14.94100,1.233787,0.740000,2.75980,17.05900,16.32800,15.79300,15.46400
2,ac5b3676bb10fc8ae4cd494e3724053321f533b28c3a64...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,65.0,36.82500,10.827847,10.168050,17.704560,24.30600,9.568966,8.899950,15.38814,105.93900,70.05795,61.31595,36.82500
3,3ca2d2ca5e29360360beef068d732605d4d4499209cdf3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,68.0,64.64205,26.075037,28.985475,44.062260,59.09805,23.979144,25.009050,41.58135,75.12705,64.87200,64.67205,64.64205
4,4446402910b942444736abdc234a69c1be8cab947550b2...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,41.0,24.86205,5.571671,5.067000,11.071395,19.12395,5.403210,4.541475,10.58505,36.03105,29.05905,25.01400,24.86205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1782,79296fdcd9d94f6a606db159963999bfc33cb8698ba71f...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,45.0,38.55000,11.764190,6.783975,26.809650,30.18405,9.537494,5.131500,23.04000,98.92905,81.83805,46.54095,38.55000
1783,161e1a1669916c2486a5d2debc2f03ae43052b01c062c6...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,32.0,19.74795,1.831723,1.050975,4.537740,6.05805,0.227902,0.031950,0.67560,32.27895,27.07005,24.79005,19.74795
1784,99b741be8b52a7ff1e704abf80d85c000b0d9d33feedc3...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,87.0,66.82305,2.900799,0.822000,8.486760,51.13200,2.485531,0.711975,7.73400,79.20600,75.39000,67.31895,66.82305
1785,84bee7e6f334fb45dfa5b2089e06cece732e898ad7e1a2...,PT_TCP,PPS,./../../Itay_&_Mila_data/all_vec_met/PT_TCP_PP...,./../../Itay_&_Mila_data/super_peaks/PT_TCP_PP...,16.7,9.73395,2.926451,2.955975,4.491300,9.43005,2.748698,2.685450,4.35216,10.27800,10.22805,9.76500,9.73395


In [None]:
tcp_pps

In [248]:
dom_ip_pred_ids = q_ip_df_cp[(q_ip_df_cp.has_dom_ip) & (q_ip_df_cp.is_dom_ip)].prediction_id
q_ip_df_cp[~q_ip_df_cp.prediction_id.isin(dom_ip_pred_ids)].

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
0,9e91f93a0ff3238166beefffa958a47e178a448cb96828...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,2.6970,0.165554,0.137,0.31240,1.8250,0.142571,0.113,0.25820,4.486,3.350,3.283,2.697,17.061,0.158080,False,False
1,339b13e25cb0736e4f40f05731618a545e09ba6bc62b64...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,13.3030,0.784256,0.414,1.64800,10.9500,0.664775,0.318,1.54920,16.246,14.616,13.423,13.303,17.061,0.779732,True,False
2,9d6c7bdb42ea30580955716df567394b9d663baeee37a5...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,14.2520,0.619796,0.337,1.39020,11.7410,0.548309,0.289,1.26160,15.877,15.192,14.864,14.252,17.061,0.835355,True,False
3,cc937b58cea1735517d13613c745ef5105b3c28da906c3...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.1023,0.004654,0.003,0.00500,0.1009,0.004515,0.003,0.00500,0.103,0.096,,,17.061,0.005996,False,False
4,99d4f9abd26617ba91f72291e8d4d476d98004b8884647...,PT_TCP,PPS,f6ab823e547c7ec805b878d2250509cb810addb988ce57...,0.0990,0.004886,0.003,0.00635,0.0990,0.004716,0.003,0.00605,0.099,0.099,,,17.061,0.005803,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9665,c80ff14c8426e137053f05e9bd8a3c83eb852e52b6574f...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,86.6000,1.608750,1.200,3.20000,61.2000,1.308472,0.800,2.80000,124.800,101.800,93.000,86.600,142.600,0.607293,True,False
9666,39475dc8dc65e6bd30126dcc4bb1da3fdaecd03cbcda61...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,32.0000,0.800694,0.400,1.80000,25.0000,0.724306,0.400,1.60000,38.000,35.400,32.800,32.000,142.600,0.224404,False,False
9667,bb57358662311f9248b306d5169e49a823572a8890b903...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,42.0000,3.761806,2.600,9.20000,23.4000,0.586111,0.200,1.40000,77.200,53.800,42.800,42.000,142.600,0.294530,False,False
9668,a17c1fee39e04fc75efa34c06d8c5b5b869975c14e365c...,PT_TCP,PPS,821a4eb62e76526824a47d0d97d9528149ebb43908980b...,11.6000,2.327500,2.200,4.00000,8.6000,2.059028,2.000,3.60000,32.800,14.400,13.200,11.600,142.600,0.081346,False,False


In [249]:
# q_ip_df_cp[(q_ip_df_cp.has_dom_ip) & (q_ip_df_cp.is_dom_ip)].to_csv('./dominant_v2.csv')
q_ip_df_cp[(q_ip_df_cp.has_dom_ip) & (q_ip_df_cp.is_dom_ip)]

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
548,e4a065a58d12fe2620ba2b397fe4bcb8bacde0de3f9c2d...,PT_TCP,PPS,cdd2a3417f8cdfc7876afaf1137f8dbd722209eb4afcc8...,4.01093,0.155522,0.122100,0.252033,1.65693,0.140907,0.113900,0.227477,8.88220,7.16600,4.93700,4.01093,4.96047,0.808579,True,True
562,1c65d0558a451056d15a651cc750c43b93a03cdf10a0ef...,PT_TCP,PPS,5156ef68b7c33f8ec70d9a98a31a60e2b7db70b34f3fed...,3.44773,0.014630,0.004400,0.028007,0.66527,0.009734,0.003930,0.017537,8.08787,6.05600,3.56800,3.44773,6.54293,0.526940,True,True
584,8fe9c1f67d78f424f1b75844c614903be6233b88b890f7...,PT_TCP,PPS,9168ad178231c8525291d3d44dd92f3c7ce8667e23281d...,0.45227,0.009493,0.003200,0.018900,0.31513,0.005612,0.002730,0.008537,0.89020,0.52627,0.52187,0.45227,0.90333,0.500670,True,True
948,e43b752981a9b3ae13bd5ef7f2be0d7d41c03d94eebbdd...,PT_TCP,PPS,8048ef0795e7e37eac47cafdd3f377bc7b1e5ba166e49a...,0.31353,0.002287,0.000130,0.001670,0.03027,0.000511,0.000070,0.001200,1.33327,0.92153,0.33460,0.31353,0.31353,1.000000,True,True
956,19e45d96ed1759477a2c2efcb8e1a7bb22d60a1440ed56...,PT_TCP,PPS,b41543e775042b3b5fdf3d2a7d6ee793088f66c43853c7...,3.10647,0.006824,0.001330,0.003130,0.01233,0.001506,0.001270,0.002400,3.55340,3.36020,3.20373,3.10647,2.20867,1.406489,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9481,1651baef2a820286ea37384c730a012173fb48db4b92d5...,PT_TCP,PPS,d8a38111147f89108a1403b22710002701d04459357f35...,4.47947,0.100590,0.008130,0.238800,3.82907,0.073823,0.005065,0.211310,6.58413,6.31573,5.55280,4.47947,6.20147,0.722324,True,True
9512,7413cf41ff3ccfe602d325f9add49a8a1ed248d755d302...,PT_TCP,PPS,753de690e31e11be6776bcb41063f309116fd32c06928a...,1.50607,0.002152,0.000270,0.001137,0.00933,0.000490,0.000000,0.000730,2.41540,1.74753,1.55973,1.50607,2.29453,0.656374,True,True
9570,2c678cc5dee3ce54d650a073844ef73f17e882f53411a1...,PT_TCP,PPS,f3af87607c60b29e9d1057adf4fbf7b91b3230f6e5a4ce...,42.43800,0.292039,0.233470,0.333137,3.51967,0.248104,0.216900,0.302830,51.49640,45.76513,43.06253,42.43800,91.34000,0.464616,True,True
9606,f9c6da218df250d48c4ac8efbdd61e34495f5677f8eab5...,PT_TCP,PPS,328ecab19f0436cdf4288198ff12317e09c1306edb805e...,17.80460,0.014809,0.000000,0.000000,16.11800,0.013741,0.000000,0.000000,18.83053,17.94240,17.81367,17.80460,20.56647,0.865710,True,True


In [223]:
# q_ip_df_cp[(q_ip_df_cp.has_dom_ip) & (q_ip_df_cp.is_dom_ip) & (q_ip_df_cp.daily_max_q90_share_ip > 1)]
q_ip_df_cp[(q_ip_df_cp.daily_max_q90_share_ip > 1)]

Unnamed: 0,ip,vector,metric,prediction_id,daily_max_q90_ip,daily_mean_q90,daily_median_q90,daily_q90_q90,daily_max_q70,daily_mean_q70,daily_median_q70,daily_q90_q70,daily_max_1,daily_max_2,daily_max_3,daily_max_4,daily_max_q90_pt_total,daily_max_q90_share_ip,is_dom_ip,has_dom_ip
89,7733da272f5fc0bfffa6d89f4664b7f8f755c9cb2e473c...,PT_TCP,PPS,2db1917702fc0d921c74e8eb52b830349f3f4ecc1cb883...,1.1093,0.000775,0.0,0.0,0.2099,0.000152,0.0,0.0,1.559,0.06,0.014,0.009,0.608,1.824507,True,False
300,7497bc2a711ff877fd1c5649b12f14909aec82b0d17d3e...,PT_TCP,PPS,66e15242fd9ed497cec4857a11acd14abb1bdf4a667630...,0.973,0.000682,0.0,0.0,0.973,0.000682,0.0,0.0,0.973,,,,0.519,1.874759,True,False
760,798553b14ecc693cba182a76249c44302cb4e625ff9510...,PT_TCP,PPS,601eba6c5eca8674a611f656808033ceaf78a4efa36306...,0.8838,0.000614,0.0,0.0,0.6894,0.000479,0.0,0.0,0.981,0.009,,,0.365,2.42137,True,False
895,9ac311005a91d0e051c17d779e25c20a2d208b2bf5addb...,PT_TCP,PPS,f8355bb1e9678d09e365db6f4dbd8beef2129f8085150d...,1.588,0.001108,0.0,0.0,0.798,0.000565,0.0,0.0,1.983,0.008,0.001,,0.517,3.071567,True,False
956,19e45d96ed1759477a2c2efcb8e1a7bb22d60a1440ed56...,PT_TCP,PPS,b41543e775042b3b5fdf3d2a7d6ee793088f66c43853c7...,3.10647,0.006824,0.00133,0.00313,0.01233,0.001506,0.00127,0.0024,3.5534,3.3602,3.20373,3.10647,2.20867,1.406489,True,True
1137,9a80f32da727b6fa58f3735a93e4dfcad3c261c8cbd7c3...,PT_TCP,PPS,efb52a5fcd9a219e99dc618fcef9be479dc4b5703971f9...,1.9788,0.00801,0.00013,0.00027,1.68987,0.002377,7e-05,0.0002,2.2398,2.13447,2.0644,1.9788,1.77387,1.115527,True,False
1609,2df5dc4a3a22e350f6a3e7ee144c418bdab5fcc66fe4d5...,PT_TCP,PPS,37efed398920eb41123b1ba56b7df708668e7d2a631e22...,20.7645,0.033788,0.0,0.006,16.7475,0.033372,0.0,0.006,22.773,16.078,5.713,4.63,17.357,1.196318,True,False
2483,442dcb49ef05547eef8e39a907424a9f33ff876a9f38c1...,PT_TCP,PPS,88a6d881deb17fcbce2fcf0f94703778abda8d881ba921...,0.643,0.000447,0.0,0.0,0.327,0.000229,0.0,0.0,0.801,0.011,0.009,,0.474,1.35654,True,False
2676,5ceb4288c5242ef934c1727d74c6424c992ecb5f32eeca...,PT_TCP,PPS,7c32642ceba9e1e8d08d9c4bfde84bb6a8cf47e4d116e9...,0.4644,0.000323,0.0,0.0,0.3632,0.000252,0.0,0.0,0.515,0.009,,,0.415,1.119036,True,False
2694,cc4c6af199986fcd94f3063b1d61eee725c4644ac4213d...,PT_TCP,PPS,7c32642ceba9e1e8d08d9c4bfde84bb6a8cf47e4d116e9...,0.4729,0.000339,0.0,0.0,0.3707,0.000266,0.0,0.0,0.524,0.013,,,0.415,1.139518,True,False
