In [1]:
import sys
import os
import subprocess

app_base_dir = '/home/eusobg/EUSO-SPB/euso-spb-patt-reco-v1'
if app_base_dir not in sys.path:
    sys.path.append(app_base_dir)

import re
# import collections
import numpy as np
import psycopg2 as pg
import pandas as pd
import pandas.io.sql as psql
import getpass
import matplotlib as mpl
import argparse
import glob
# from tqdm import tqdm
import traceback
import hashlib

from utility_funtions import str2bool_argparse


def __check_agg():
    show_plots = False
    if '--show-plots' in sys.argv[1:]:
        args_parser = argparse.ArgumentParser(description='')
        args_parser.add_argument('--show-plots',type=str2bool_argparse,default=False,help='If true, plots are only showed in windows')
        args , _ = args_parser.parse_known_args(sys.argv[1:])
        show_plots = args.show_plots
    if not show_plots:
        mpl.use('Agg')


__check_agg()


mpl.rcParams['figure.dpi'] = 80

import matplotlib.pyplot as plt

#import ROOT

# import tool.npy_frames_visualization as npy_vis
import tool.acqconv
import data_analysis_utils
import supervised_classification as supc

Welcome to JupyROOT 6.10/00


In [2]:
import event_recognition_efficiency

In [3]:
con = con = pg.connect(dbname='eusospb_data', user='eusospb', host='localhost', password='1e20eVjemeuso')
cur = con.cursor()

In [4]:
cond_selection_rules = event_recognition_efficiency.get_selection_rules(min_gtu=13)

In [28]:
labeled_events_columns = ['event_id', 'source_file_acquisition_full', 'source_file_trigger_full', 'packet_id', 'num_gtu', 'gtu_in_packet', 'num_triggered_pixels',
'gtu_y_hough__peak_thr2_avg_phi', 'gtu_x_hough__peak_thr2_avg_phi', 'gtu_y_hough__peak_thr3_avg_phi', 'gtu_x_hough__peak_thr3_avg_phi',
'trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_gtu_x_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_gtu_y_hough__peak_thr1__max_cluster_counts_sum_width',
'gtu_y_hough__peak_thr1__max_cluster_counts_sum_width', 'gtu_x_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width', 'x_y_hough__peak_thr1__max_cluster_counts_sum_width',
'trigg_x_y_hough__dbscan_num_clusters_above_thr1', 'trigg_gtu_y_hough__dbscan_num_clusters_above_thr1', 'trigg_gtu_x_hough__dbscan_num_clusters_above_thr1',
'gtu_y_hough__dbscan_num_clusters_above_thr1', 'gtu_x_hough__dbscan_num_clusters_above_thr1',
'etruth_trueenergy', 'egeometry_pos_z']

less34_visible_showers_all = psql.read_sql(supc.get_query__select_simu_events(3, 4, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0), con)
visible_showers_all = psql.read_sql(supc.get_query__select_simu_events(5, 999, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0), con)
invisible_showers_all = psql.read_sql(supc.get_query__select_simu_events(0, 2, 0, 1, columns=labeled_events_columns, limit=10000, offset=0), con)
# low_energy_in_pmt_all = psql.read_sql(supc.get_query__select_training_data__low_energy_in_pmt(columns=labeled_events_columns, limit=10000, offset=0), con) # maybe too many gtu ?
# led_all = psql.read_sql(supc.get_query__select_training_data__led(columns=labeled_events_columns, limit=10000, offset=0), con)

# cond_selection_rules_t1_prefixed = re.sub('|'.join(spb_processing_event_ver2_columns),r't1.\g<0>', cond_selection_rules)
cond_selection_rules_not = 'NOT ('+cond_selection_rules+' )'

less34_visible_showers_cond = psql.read_sql(supc.get_query__select_simu_events(3, 4, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con)
visible_showers_cond = psql.read_sql(supc.get_query__select_simu_events(5, 999, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con)
invisible_showers_cond = psql.read_sql(supc.get_query__select_simu_events(0, 2, 0, 1, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con)
# low_energy_in_pmt_cond = psql.read_sql(supc.get_query__select_training_data__low_energy_in_pmt(columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con) # maybe too many gtu ?
# led_cond = psql.read_sql(supc.get_query__select_training_data__led(columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con)

less34_visible_showers_not_cond = psql.read_sql(supc.get_query__select_simu_events(3, 4, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules_not), con)
visible_showers_not_cond = psql.read_sql(supc.get_query__select_simu_events(5, 999, 3, 800, 3, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules_not), con)
invisible_showers_not_cond = psql.read_sql(supc.get_query__select_simu_events(0, 2, 0, 1, columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules_not), con)
# low_energy_in_pmt_not_cond = psql.read_sql(supc.get_query__select_training_data__low_energy_in_pmt(columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con) # maybe too many gtu ?
# led_not_cond = psql.read_sql(supc.get_query__select_training_data__led(columns=labeled_events_columns, limit=10000, offset=0, additional_conditions=cond_selection_rules), con)

# process_simu_group_statistics(visible_showers_cond, visible_showers_not_cond, visible_showers_all,
#                               'visible_showers_cond', 'visible_showers_not_cond', 'visible_showers_all', 'VISIBLE SHOWERS',
#                               save_csv_dir, save_fig_dir, save_npy_dir, args)

# process_simu_group_statistics(less34_visible_showers_cond, less34_visible_showers_not_cond, less34_visible_showers_all,
#                               'less34_visible_showers_cond', 'less34_visible_showers_not_cond', 'less34_visible_showers_all', 'LESS34 VISIBLE SHOWERS',
#                               save_csv_dir, save_fig_dir, save_npy_dir, args)

# process_simu_group_statistics(invisible_showers_cond, invisible_showers_not_cond, invisible_showers_all,
#                               'invisible_showers_cond', 'invisible_showers_not_cond', 'invisible_showers_all', 'INVISIBLE SHOWERS',
#                               save_csv_dir, save_fig_dir, save_npy_dir, args)

# process_simu_group_statistics(low_energy_in_pmt_cond, low_energy_in_pmt_not_cond, low_energy_in_pmt_all,
#                               'low_energy_in_pmt_cond', 'low_energy_in_pmt_not_cond', 'low_energy_in_pmt_all', 'CHARGED PARTICLES',
#                               save_csv_dir, save_fig_dir, args)

# process_simu_group_statistics(led_cond, led_not_cond, led_all,
#                               'led_cond', 'led_not_cond', 'led_all', 'LED',
#                               save_csv_dir, save_fig_dir, args)


In [29]:
labeled_events_columns = ['event_id', 'source_file_acquisition_full', 'source_file_trigger_full', 'global_gtu', 'packet_id', 'gtu_in_packet', 'num_gtu', 'num_triggered_pixels',
        'gtu_y_hough__peak_thr2_avg_phi', 'gtu_x_hough__peak_thr2_avg_phi', 'gtu_y_hough__peak_thr3_avg_phi', 'gtu_x_hough__peak_thr3_avg_phi',
        'trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_gtu_x_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_gtu_y_hough__peak_thr1__max_cluster_counts_sum_width',
        'gtu_y_hough__peak_thr1__max_cluster_counts_sum_width', 'gtu_x_hough__peak_thr1__max_cluster_counts_sum_width', 'trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width', 'x_y_hough__peak_thr1__max_cluster_counts_sum_width',
        'trigg_x_y_hough__dbscan_num_clusters_above_thr1', 'trigg_gtu_y_hough__dbscan_num_clusters_above_thr1', 'trigg_gtu_x_hough__dbscan_num_clusters_above_thr1',
        'gtu_y_hough__dbscan_num_clusters_above_thr1', 'gtu_x_hough__dbscan_num_clusters_above_thr1', 'x_y_active_pixels_num'
#         'etruth_trueenergy', 'egeometry_pos_z'
                         ]
low_energy_in_pmt_all_query = supc.get_query__select_training_data__low_energy_in_pmt(columns=labeled_events_columns, limit=10000, offset=0)
low_energy_in_pmt_all = psql.read_sql(low_energy_in_pmt_all_query, con) # maybe too many gtu ?
# led_all = psql.read_sql(supc.get_query__select_training_data__led(columns=labeled_events_columns, limit=10000, offset=0), con)

In [30]:
len(low_energy_in_pmt_all)

7067

In [31]:
low_energy_in_pmt_all_7ec = low_energy_in_pmt_all[ low_energy_in_pmt_all['x_y_active_pixels_num'] > 256*7 ]

In [32]:
len(low_energy_in_pmt_all_7ec)

5527

In [33]:
low_energy_in_pmt_all_7ec

Unnamed: 0,event_id,source_file_acquisition_full,source_file_trigger_full,global_gtu,packet_id,gtu_in_packet,num_gtu,num_triggered_pixels,gtu_y_hough__peak_thr2_avg_phi,gtu_x_hough__peak_thr2_avg_phi,...,gtu_y_hough__peak_thr1__max_cluster_counts_sum_width,gtu_x_hough__peak_thr1__max_cluster_counts_sum_width,trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width,x_y_hough__peak_thr1__max_cluster_counts_sum_width,trigg_x_y_hough__dbscan_num_clusters_above_thr1,trigg_gtu_y_hough__dbscan_num_clusters_above_thr1,trigg_gtu_x_hough__dbscan_num_clusters_above_thr1,gtu_y_hough__dbscan_num_clusters_above_thr1,gtu_x_hough__dbscan_num_clusters_above_thr1,x_y_active_pixels_num
0,87144,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,8103,63,39,11,483,-0.010349,0.010136,...,11.0,9.0,13.0,12.0,2,1,1,1.0,1.0,2290
1,247771,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,21928,171,40,10,482,-0.000293,0.010426,...,10.0,10.0,18.0,15.0,1,1,1,1.0,1.0,2297
2,28703,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4136,32,40,10,472,-0.017651,-0.014080,...,11.0,9.0,32.0,15.0,2,1,1,1.0,1.0,2294
3,117458,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4008,31,40,12,465,-0.000198,-0.012352,...,12.0,12.0,7.0,30.0,1,1,1,1.0,1.0,2292
4,133168,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,16295,127,39,10,448,-0.022676,-0.013946,...,10.0,9.0,13.0,9.0,1,1,1,1.0,1.0,2290
5,225442,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,13224,103,40,12,444,-0.011757,-0.002813,...,10.0,10.0,40.0,12.0,2,1,1,1.0,1.0,2290
6,225283,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,9512,74,40,10,440,-0.000452,-0.017419,...,11.0,9.0,22.0,14.0,1,1,1,1.0,1.0,2292
7,209345,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,8231,64,39,10,438,-0.000224,-0.007618,...,11.0,10.0,43.0,17.0,1,1,1,1.0,1.0,2291
8,230452,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4391,34,39,10,438,-0.014281,-0.007690,...,9.0,10.0,19.0,12.0,2,1,1,1.0,1.0,2292
9,58936,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,5544,43,40,11,435,0.011060,-0.003433,...,8.0,10.0,26.0,21.0,1,2,1,2.0,1.0,2292


In [34]:
low_energy_in_pmt_all

Unnamed: 0,event_id,source_file_acquisition_full,source_file_trigger_full,global_gtu,packet_id,gtu_in_packet,num_gtu,num_triggered_pixels,gtu_y_hough__peak_thr2_avg_phi,gtu_x_hough__peak_thr2_avg_phi,...,gtu_y_hough__peak_thr1__max_cluster_counts_sum_width,gtu_x_hough__peak_thr1__max_cluster_counts_sum_width,trigg_x_y_hough__peak_thr1__max_cluster_counts_sum_width,x_y_hough__peak_thr1__max_cluster_counts_sum_width,trigg_x_y_hough__dbscan_num_clusters_above_thr1,trigg_gtu_y_hough__dbscan_num_clusters_above_thr1,trigg_gtu_x_hough__dbscan_num_clusters_above_thr1,gtu_y_hough__dbscan_num_clusters_above_thr1,gtu_x_hough__dbscan_num_clusters_above_thr1,x_y_active_pixels_num
0,87144,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,8103,63,39,11,483,-0.010349,1.013590e-02,...,11.0,9.0,13.0,12.0,2,1,1,1.0,1.0,2290
1,247771,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,21928,171,40,10,482,-0.000293,1.042566e-02,...,10.0,10.0,18.0,15.0,1,1,1,1.0,1.0,2297
2,28703,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4136,32,40,10,472,-0.017651,-1.407998e-02,...,11.0,9.0,32.0,15.0,2,1,1,1.0,1.0,2294
3,117458,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4008,31,40,12,465,-0.000198,-1.235157e-02,...,12.0,12.0,7.0,30.0,1,1,1,1.0,1.0,2292
4,133168,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,16295,127,39,10,448,-0.022676,-1.394575e-02,...,10.0,9.0,13.0,9.0,1,1,1,1.0,1.0,2290
5,225442,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,13224,103,40,12,444,-0.011757,-2.812838e-03,...,10.0,10.0,40.0,12.0,2,1,1,1.0,1.0,2290
6,225283,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,9512,74,40,10,440,-0.000452,-1.741921e-02,...,11.0,9.0,22.0,14.0,1,1,1,1.0,1.0,2292
7,209345,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,8231,64,39,10,438,-0.000224,-7.618000e-03,...,11.0,10.0,43.0,17.0,1,1,1,1.0,1.0,2291
8,230452,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,4391,34,39,10,438,-0.014281,-7.690025e-03,...,9.0,10.0,19.0,12.0,2,1,1,1.0,1.0,2292
9,58936,/media/node15_data2/SPBDATA/allpackets-SPBEUSO...,/home/eusobg/EUSO-SPB/SPBDATA_processed/allpac...,5544,43,40,11,435,0.011060,-3.432526e-03,...,8.0,10.0,26.0,21.0,1,2,1,2.0,1.0,2292


In [35]:
outdir='/home/eusobg/EUSO-SPB/spb_simu_labeled'

In [37]:
for k, data in {
    'less34_visible_showers_all': less34_visible_showers_all,
    'visible_showers_all': visible_showers_all,
    'invisible_showers_all': invisible_showers_all,
    'less34_visible_showers_cond': less34_visible_showers_cond,
    'visible_showers_cond': visible_showers_cond,
    'invisible_showers_cond': invisible_showers_cond,
    'less34_visible_showers_not_cond': less34_visible_showers_not_cond,
    'visible_showers_not_cond': visible_showers_not_cond,
    'invisible_showers_not_cond': invisible_showers_not_cond
}.items():
    print("{}({})".format(k,len(data)))
    for i, entry in data.iterrows():
        if i % 100 == 0:
            print("\t{}".format(i))
            
        npyconv_events_dir = os.path.dirname(entry['source_file_acquisition_full']).replace('/home/eusobg/EUSO-SPB/SPBDATA_processed/spb_simu/','')
        trigger_events_dir = os.path.dirname(entry['source_file_trigger_full']).replace('/home/eusobg/EUSO-SPB/SPBDATA_processed/spb_simu/','')
        
        basename_npyconv = os.path.basename(entry['source_file_acquisition_full'])
        basename_trigger = os.path.basename(entry['source_file_trigger_full'])
        
        outdir_npyconv = os.path.join(outdir, k, npyconv_events_dir)
        outdir_trigger = os.path.join(outdir, k, trigger_events_dir)
        
        os.makedirs(outdir_npyconv, exist_ok=True)
        os.makedirs(outdir_trigger, exist_ok=True)
        
        outfile_npyconv = os.path.join(outdir_npyconv, basename_npyconv)
        outfile_trigger = os.path.join(outdir_trigger, basename_trigger)
        
        if not os.path.exists(outfile_npyconv):
            os.symlink(entry['source_file_acquisition_full'], outfile_npyconv)
        if not os.path.exists(outfile_trigger):
            os.symlink(entry['source_file_trigger_full'], outfile_trigger)
        



less34_visible_showers_all(1124)
	0
	100
	200
	300
	400
	500
	600
	700
	800
	900
	1000
	1100
visible_showers_all(4011)
	0
	100
	200
	300
	400
	500
	600
	700
	800
	900
	1000
	1100
	1200
	1300
	1400
	1500
	1600
	1700
	1800
	1900
	2000
	2100
	2200
	2300
	2400
	2500
	2600
	2700
	2800
	2900
	3000
	3100
	3200
	3300
	3400
	3500
	3600
	3700
	3800
	3900
	4000
visible_showers_not_cond(1008)
	0
	100
	200
	300
	400
	500
	600
	700
	800
	900
	1000
invisible_showers_all(10000)
	0
	100
	200
	300
	400
	500
	600
	700
	800
	900
	1000
	1100
	1200
	1300
	1400
	1500
	1600
	1700
	1800
	1900
	2000
	2100
	2200
	2300
	2400
	2500
	2600
	2700
	2800
	2900
	3000
	3100
	3200
	3300
	3400
	3500
	3600
	3700
	3800
	3900
	4000
	4100
	4200
	4300
	4400
	4500
	4600
	4700
	4800
	4900
	5000
	5100
	5200
	5300
	5400
	5500
	5600
	5700
	5800
	5900
	6000
	6100
	6200
	6300
	6400
	6500
	6600
	6700
	6800
	6900
	7000
	7100
	7200
	7300
	7400
	7500
	7600
	7700
	7800
	7900
	8000
	8100
	8200
	8300
	8400
	8500
	8600
	8700
	8800
	8900
	9000