In [None]:
import numpy as np
import pandas as pd

In [None]:
from gwpy.table import GravitySpyTable

In [None]:
O3_data = GravitySpyTable.read([f'./datasets/GlitchesO3/H1_O3b.csv', f'./datasets/GlitchesO3/L1_O3b.csv'])

In [None]:
assert len(O3_data) == 192693 + 99568

In [None]:
ml_confidence = 0
H1_confident = O3_data.filter("ifo==H1", f"ml_confidence>={ml_confidence}").to_pandas()
L1_confident = O3_data.filter("ifo==L1", f"ml_confidence>={ml_confidence}").to_pandas()
len(H1_confident), len(L1_confident)

(99568, 192693)

In [None]:
for ml_confidence in (0, 0.5, 0.75, 0.9, 0.99, 0.999, 0.9999, 0.99999):
    temp_H1 = O3_data.filter("ifo==H1", f"ml_confidence>={ml_confidence}").to_pandas()
    temp_L1 = O3_data.filter("ifo==L1", f"ml_confidence>={ml_confidence}").to_pandas()
    n_labels = min(len(np.unique(temp_H1['ml_label'])), len(np.unique(temp_L1['ml_label'])))
    print(f"conf level: {ml_confidence:<7}, H1 samples: {len(temp_H1):>6}, L1 samples: {len(temp_L1):>6}, classes: {n_labels}")

conf level: 0      , H1 samples:  99568, L1 samples: 192693, classes: 23
conf level: 0.5    , H1 samples:  98440, L1 samples: 185110, classes: 23
conf level: 0.75   , H1 samples:  90532, L1 samples: 159831, classes: 23
conf level: 0.9    , H1 samples:  82324, L1 samples: 137938, classes: 23
conf level: 0.99   , H1 samples:  60201, L1 samples:  99198, classes: 21
conf level: 0.999  , H1 samples:  31060, L1 samples:  64733, classes: 18
conf level: 0.9999 , H1 samples:   8368, L1 samples:  38933, classes: 14
conf level: 0.99999, H1 samples:   2481, L1 samples:  23039, classes: 13


In [None]:
labels = sorted(np.unique(O3_data['ml_label']))
labels

['1080Lines',
 '1400Ripples',
 'Air_Compressor',
 'Blip',
 'Blip_Low_Frequency',
 'Chirp',
 'Extremely_Loud',
 'Fast_Scattering',
 'Helix',
 'Koi_Fish',
 'Light_Modulation',
 'Low_Frequency_Burst',
 'Low_Frequency_Lines',
 'No_Glitch',
 'Paired_Doves',
 'Power_Line',
 'Repeating_Blips',
 'Scattered_Light',
 'Scratchy',
 'Tomte',
 'Violin_Mode',
 'Wandering_Line',
 'Whistle']

In [None]:
total_samples = {}
for idx, label in enumerate(labels):
    n_samples_H1 = len(H1_confident.loc[H1_confident['ml_label'] == label])
    n_samples_L1 = len(L1_confident.loc[L1_confident['ml_label'] == label])
    print(f'{idx:<2}: {label:<20}: {n_samples_L1:>5} : {n_samples_H1:>5} (L1/H1) samples')

0 : 1080Lines           :  1343 :   354 (L1/H1) samples
1 : 1400Ripples         :   185 :   208 (L1/H1) samples
2 : Air_Compressor      :  2164 :   139 (L1/H1) samples
3 : Blip                :  3321 :  4123 (L1/H1) samples
4 : Blip_Low_Frequency  : 11945 :  2845 (L1/H1) samples
5 : Chirp               :    20 :    23 (L1/H1) samples
6 : Extremely_Loud      :  4412 :  7082 (L1/H1) samples
7 : Fast_Scattering     : 52374 :  2154 (L1/H1) samples
8 : Helix               :   106 :    44 (L1/H1) samples
9 : Koi_Fish            :  4744 :  5049 (L1/H1) samples
10: Light_Modulation    :   372 :    93 (L1/H1) samples
11: Low_Frequency_Burst :  4458 :  2785 (L1/H1) samples
12: Low_Frequency_Lines : 11938 :  2702 (L1/H1) samples
13: No_Glitch           :  4201 :  1075 (L1/H1) samples
14: Paired_Doves        :  3066 :   265 (L1/H1) samples
15: Power_Line          :   627 :   144 (L1/H1) samples
16: Repeating_Blips     :   806 :   972 (L1/H1) samples
17: Scattered_Light     : 52935 : 64968 (L1/H1) 

In [None]:
meta_data = pd.DataFrame(columns=['id', 'label', 'detector'])
detectors = ['H1', 'L1']
detector_dfs = {'H1': H1_confident, 'L1': L1_confident}
for label in labels:
    for detector in detectors:
        detector_df = detector_dfs[detector]
        try:
            ids = np.random.choice(detector_df.loc[detector_df['ml_label']==label]['gravityspy_id'], size=5, replace=False)
        except ValueError:
            ids = np.array(detector_df.loc[detector_df['ml_label']==label]['gravityspy_id'])
        for id_ in ids:
            meta_data.loc[len(meta_data)] = [id_, label, detector]
meta_data

Unnamed: 0,id,label,detector
0,4OtG8h8wwc,1080Lines,H1
1,ZV2x9fosuY,1080Lines,H1
2,sHNdaM7ELO,1080Lines,H1
3,diwmOJX3qG,1080Lines,H1
4,rjvZI2ebvG,1080Lines,H1
...,...,...,...
220,H23FqDLgaQ,Whistle,L1
221,HKJ9M8VytN,Whistle,L1
222,IAjGDHqPDT,Whistle,L1
223,dkIL7Jw0bC,Whistle,L1


In [None]:
detector_dfs = {'H1': H1_confident, 'L1': L1_confident}

In [None]:
top_H1 = detector_dfs['H1'].sort_values('ml_confidence', ascending=False).groupby('ml_label').head(5)[['gravityspy_id', 'ml_label', 'ml_confidence']]
top_H1

Unnamed: 0,gravityspy_id,ml_label,ml_confidence
61228,j64jal7FbQ,Extremely_Loud,1.000000
25602,tjjdnn7FGm,Whistle,1.000000
45512,Nche0RmYOc,Extremely_Loud,1.000000
55964,bRr1xFxXip,Whistle,1.000000
95266,SMRkR6zSv6,Extremely_Loud,1.000000
...,...,...,...
64110,kwYnhvabQ8,Chirp,0.944102
27847,ntDGl3DMIq,Wandering_Line,0.886548
62904,SEvX8ptPmW,Chirp,0.849025
10720,6Y7EmTGyo4,Wandering_Line,0.801918


In [None]:
top_L1 = detector_dfs['H1'].sort_values('ml_confidence', ascending=False).groupby('ml_label').head(5)[['gravityspy_id', 'ml_label', 'ml_confidence']]
top_L1

Unnamed: 0,gravityspy_id,ml_label,ml_confidence
61228,j64jal7FbQ,Extremely_Loud,1.000000
25602,tjjdnn7FGm,Whistle,1.000000
45512,Nche0RmYOc,Extremely_Loud,1.000000
55964,bRr1xFxXip,Whistle,1.000000
95266,SMRkR6zSv6,Extremely_Loud,1.000000
...,...,...,...
64110,kwYnhvabQ8,Chirp,0.944102
27847,ntDGl3DMIq,Wandering_Line,0.886548
62904,SEvX8ptPmW,Chirp,0.849025
10720,6Y7EmTGyo4,Wandering_Line,0.801918


In [None]:
pd.concat([top_H1, top_L1])

Unnamed: 0,gravityspy_id,ml_label,ml_confidence
61228,j64jal7FbQ,Extremely_Loud,1.000000
25602,tjjdnn7FGm,Whistle,1.000000
45512,Nche0RmYOc,Extremely_Loud,1.000000
55964,bRr1xFxXip,Whistle,1.000000
95266,SMRkR6zSv6,Extremely_Loud,1.000000
...,...,...,...
64110,kwYnhvabQ8,Chirp,0.944102
27847,ntDGl3DMIq,Wandering_Line,0.886548
62904,SEvX8ptPmW,Chirp,0.849025
10720,6Y7EmTGyo4,Wandering_Line,0.801918


In [None]:
detector_dfs['H1'].loc[detector_dfs['H1']['gravityspy_id']=='deNCXlLADm']

Unnamed: 0,event_time,ifo,peak_time,peak_time_ns,start_time,start_time_ns,duration,peak_frequency,central_freq,bandwidth,...,Tomte,Violin_Mode,Wandering_Line,Whistle,ml_label,ml_confidence,url1,url2,url3,url4
63873,1268205000.0,H1,1268204583,695312023,1268204583,0,1.5,27.308241,3463.896729,6897.204102,...,2.714552e-09,2.496165e-08,9.3099e-11,9.696328e-14,Paired_Doves,0.999732,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...


In [None]:
detector_dfs['H1'].loc[detector_dfs['H1']['gravityspy_id']=='HjCOMZ9pYz']

Unnamed: 0,event_time,ifo,peak_time,peak_time_ns,start_time,start_time_ns,duration,peak_frequency,central_freq,bandwidth,...,Tomte,Violin_Mode,Wandering_Line,Whistle,ml_label,ml_confidence,url1,url2,url3,url4
97156,1268205000.0,H1,1268204583,695312023,1268204583,0,1.5,27.308241,3463.896729,6897.204102,...,2.714552e-09,2.496165e-08,9.3099e-11,9.696328e-14,Paired_Doves,0.999732,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...,https://panoptes-uploads.zooniverse.org/produc...


In [None]:
H1_confident[['ifo', 'ml_label']]

Unnamed: 0,ifo,ml_label
0,H1,Scattered_Light
1,H1,Scattered_Light
2,H1,Blip_Low_Frequency
3,H1,Scattered_Light
4,H1,Scattered_Light
...,...,...
99563,H1,Extremely_Loud
99564,H1,Blip
99565,H1,Scattered_Light
99566,H1,Scattered_Light
