In [1]:
import ROOT
import os
import sys
import glob
import collections
import re
import numpy as np
import pandas as pd
import sys

Welcome to JupyROOT 6.13/02


In [2]:
data_snippets_dir = 'utah_events_directory_analysis'
os.makedirs(data_snippets_dir, exist_ok=True)

In [3]:
# utah_files_glob = '/home/spbproc/SPBDATA_utah/*'
original_utah_files_dir = '/mnt/data_sgbc1/SPBDATA/SPB_Utah'
original_utah_files_dirs = original_utah_files_dir + '/[0-9]*'

In [4]:
# l = [f for f in glob.glob(original_utah_files_dir+'/[0-9]*/*.root', recursive=True)]
# l = [os.path.join(dp, f) for dp, dn, fn in os.walk(original_utah_files_dir) for f in fn if f.endswith('.root')]

root_files = []

for d in glob.glob(original_utah_files_dirs):
    if not os.path.isdir(d):
        continue
    root_files += [os.path.join(dp, f) for dp, dn, fn in os.walk(d) for f in fn if f.endswith('.root')]

In [5]:
print(len(root_files))

988


In [6]:
# for p in sorted(root_files):
#     print(p)

In [7]:
# [[(w1, distance.levenshtein(w1,w2)) for w1 in root_files] for w2 in [root_files[0]]]

In [8]:
num_events_counts_file_pathname = os.path.join(data_snippets_dir, 'num_events_dict.tsv')
print(num_events_counts_file_pathname)

utah_events_directory_analysis/num_events_dict.tsv


In [9]:
num_events_dict = collections.OrderedDict()
if not os.path.exists(num_events_counts_file_pathname):
    for i, p in enumerate(sorted(root_files)):
        if i % 10 == 0:
            print('{}/{}'.format(i+1, len(root_files)))
            sys.stdout.flush()
        f = ROOT.TFile.Open(p)
        ent = None
        if f and not f.IsZombie():
            t = f.Get('tevent')
            if t:
                ent = t.GetEntries()
            f.Close()
        if f:
            del f
        num_events_dict[p] = ent
            
    with open(num_events_counts_file_pathname,'w') as f:
        for k, v in num_events_dict.items():
            print('{}\t{}'.format(k,str(v)), file=f)
    
else:
    print('Loading existing table:', num_events_counts_file_pathname)
    with open(num_events_counts_file_pathname,'r') as f:
        for l in f:
            c = l[:-1].split('\t')
            num_events_dict[c[0]] = int(c[1]) if c[1] != 'None' else None

Loading existing table: utah_events_directory_analysis/num_events_dict.tsv


In [10]:
np.sum([1 for v in num_events_dict.values() if v is not None])

795

In [11]:
num_events_by_dirname_dict = collections.OrderedDict()
for p, c in num_events_dict.items():
    d = os.path.dirname(p)
    if d not in num_events_by_dirname_dict:
        num_events_by_dirname_dict[d] = collections.OrderedDict()
    num_events_by_dirname_dict[d][p] = c

In [12]:
# for d, sub_dict in num_events_by_dirname_dict.items():
#     print(d)
#     for p, c in sub_dict.items():
#         print('\t{}\t{}'.format(p,c))

In [13]:
num_events_by_dirname_by_suffix_dict = collections.OrderedDict()

for p, c in num_events_dict.items():
    
    d = os.path.dirname(p)
    if d not in num_events_by_dirname_by_suffix_dict:
        num_events_by_dirname_by_suffix_dict[d] = collections.OrderedDict()
        
    suffix_matches = re.search(r'--(.*)\.root$', p)

    suffix = None

    if suffix_matches:
        suffix = suffix_matches.group(1)

    groups = []

    if suffix:

        for suffix_part in suffix.split('_'):

            subsuffix_matches = re.search(r'^(\d*[a-zA-Z_]{2,})(\d+)([^0-9].*)?$', suffix_part)

            if subsuffix_matches:
                groups += [g for g in subsuffix_matches.groups() if g is not None]
            else:
                groups.append(suffix_part)

                
    base_dict = num_events_by_dirname_by_suffix_dict[d]
    
    for g in groups:
        if g not in base_dict:
            base_dict[g] = collections.OrderedDict()
        base_dict = base_dict[g]
    
    base_dict[p] = c

In [14]:
def remove_prefix(source_str, removed_prefix):
    if source_str[:len(removed_prefix)] == removed_prefix:
        source_str = source_str[len(removed_prefix)+1:]
    return source_str
    

def count_vals_recursively(base_dict):
    tot_v = 0
    for k,v in base_dict.items():
        if isinstance(v,dict):
            tot_v += count_vals_recursively(v)
        else:
            tot_v += v if v is not None else 0
    return tot_v
                

def print_dicts_recursively(base_dict, removed_prefix=None, offset=0, width=120, tab_str='    ', 
                            max_depth=None, count_for_nodes=True, offset_numbers=True, 
                            translate_func=None):
    if width is None:
        line_format = '{}{}'
    else:
        line_format = '{:'+str(width)+'}' + (tab_str*offset if offset_numbers else '') + '{}'
        
    for k,v in base_dict.items():
        if removed_prefix is not None:
            if k[:len(removed_prefix)] == removed_prefix:
                k = k[len(removed_prefix)+1:]
                
        if max_depth is None or offset <= max_depth:
            if isinstance(v,dict):
                k_str = (tab_str*offset)+k+':'
                if count_for_nodes:
                    sum_count = count_vals_recursively(v)
                    if callable(translate_func):
                        sum_count = translate_func(sum_count)
                    print(line_format.format(k_str, sum_count))
                else:
                    print(k_str)
                
                print_dicts_recursively(v, removed_prefix, offset+1, width, tab_str, 
                                        max_depth, count_for_nodes, offset_numbers, 
                                        translate_func)
            else:
#                 print('>>', v)
                if callable(translate_func):
                    v = translate_func(v)
                print(line_format.format((tab_str*offset)+k,str(v)))

In [15]:
def remove_single_item_dicts(base_dict, offset=0, delimiter='_'):
    
    if not isinstance(base_dict, dict) or len(base_dict) == 0 or \
            (len(base_dict) == 1) and not isinstance(list(base_dict.values())[0], dict):
        return base_dict
    
    if len(base_dict) == 1:
        for k,v in base_dict.items():
            return k, remove_single_item_dicts(v)
        
    else:
        out_dict = collections.OrderedDict()
        for k,v in base_dict.items():
            if not isinstance(v, dict):
                out_dict[k] = v
            else:
                ret = remove_single_item_dicts(v, offset+1)
                if not isinstance(ret, tuple):
                    out_dict[k] = ret
                else:
                    new_k = k
                    while isinstance(ret, tuple):
                        new_k += delimiter + ret[0]
                        ret = remove_single_item_dicts(ret[1], offset+1)
                    out_dict[new_k] = ret
        return out_dict

In [16]:
def filter_tree(base_dict, search_term_str):
#     out_dict = collections.OrderedDict()
    out_dict_leaf_filtered = collections.OrderedDict()
    
    has_non_dict_k = False
#     has_dict_k = False
    
    for k,v in base_dict.items():
        
        cur_has_non_dict_k = False
        
        if not isinstance(search_term_str,(list,tuple)):
            search_term_str = [search_term_str]
        for sub_search_term_str in search_term_str:
            if sub_search_term_str in k:
                has_non_dict_k = True
                cur_has_non_dict_k = True
                break
        
        if not isinstance(v, dict):
#             out_dict[k] = v
            
            if cur_has_non_dict_k:
                out_dict_leaf_filtered[k] = v
        else:
#             out_dict[k] = v
            
#             if search_term_str in k:
#                 has_non_dict_k = True

            if cur_has_non_dict_k:
                out_dict_leaf_filtered[k] = v
            else:
                ret_out_dict_filtered, ret_has_non_dict_k = filter_tree(v, search_term_str)

                if ret_has_non_dict_k:
                    out_dict_leaf_filtered[k] = ret_out_dict_filtered
            
            
            
#             if ret_has_dict_k:
#                 out_dict[k] = ret_out_dict
            
            has_non_dict_k = has_non_dict_k or ret_has_non_dict_k    
    
#     return out_dict, out_dict_filtered, has_non_dict_k, has_dict_k

    return out_dict_leaf_filtered, has_non_dict_k

In [17]:
def transform_dict_depths_recursively(
        base_dict,   
        max_depth=None, 
        item_func=None, key_join='_', 
        exclude_bottom_level=True,
        out_dict = None, base_prefix=[], offset=0
):
    
    if out_dict is None:
        out_dict = collections.OrderedDict()
        if len(base_prefix) > 0:
            bk_str = key_join.join(base_prefix)
            if bk_str not in out_dict:
                out_dict[bk_str] = collections.OrderedDict()
            out_dict = out_dict[bk_str]
            offset += len(base_prefix)
            max_depth += len(base_prefix)
    
    param_offset = offset
    
    is_new_branch = True
    if max_depth is not None:
        if offset >= max_depth:
            is_new_branch = False
            offset = max_depth
    
    if len(base_prefix) > 0 and offset > 0:
        f_base_prefix = base_prefix[offset-1:]
    else:
        f_base_prefix = base_prefix

#     print('> is_new_branch =', is_new_branch, ' offset =', offset, ' max_depth =', max_depth, ' base_prefix =', base_prefix, ' f_base_prefix =', f_base_prefix)

        
    if is_new_branch and len(f_base_prefix) > 0:
        bk_str = key_join.join(f_base_prefix)
        if bk_str not in out_dict:
            out_dict[bk_str] = collections.OrderedDict()
        n_out_dict = out_dict[bk_str]
    else:
        n_out_dict = out_dict
    
    for k,v in base_dict.items():
        
        n_base_prefix = list(f_base_prefix) if not is_new_branch else list()
    #         n_base_prefix = list(base_prefix) if not is_new_branch else list()
        n_base_prefix.append(k)
        
        r_base_prefix = list(base_prefix)
        r_base_prefix.append(k)
        
#         print(' '*offset, n_base_prefix)
        
        if isinstance(v, dict):
            
#             print(' '*offset, ' - dict')
#             print(' '*offset, ' - new branch' if is_new_branch else ' - same branch', ' k =', k, ' base_prefix =', base_prefix, ', (forwarded) r_base_prefix =', r_base_prefix)
            
            n_out_dict = transform_dict_depths_recursively(
                v, 
                max_depth, 
                item_func, key_join, 
                exclude_bottom_level,
                n_out_dict, r_base_prefix, offset+1)

            # adding to dict at this level, prefixed by k
        else:
            if exclude_bottom_level and len(f_base_prefix) > 0:
                bk_str = key_join.join(f_base_prefix)
                if bk_str not in out_dict:
                    out_dict[bk_str] = collections.OrderedDict()
                n_out_dict = out_dict[bk_str]
                k_str = k
                
#                 print(' '*offset, bk_str, '(bk_str)')
#                 print(' '*offset, ' - val')
#                 print(' '*(offset+1), k)
            else:

                k_str = key_join.join(n_base_prefix)
                n_out_dict = out_dict

#                 print(' '*offset, k_str, '(k_str, is_new_branch)')
#                 print(' '*offset, ' - val')

            # it could be potentially flatten here but 
            # base_prefix, k, 
            
            n_out_dict[k_str] = v
            
            if callable(item_func):
                item_func(r_base_prefix, n_out_dict, k_str, v)
    
    return out_dict

## All elevents compacted

In [18]:
num_events_by_dirname_by_suffix_dict_compacted = remove_single_item_dicts(num_events_by_dirname_by_suffix_dict)

In [19]:
print_dicts_recursively(num_events_by_dirname_by_suffix_dict_compacted)

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS:                                                                            2257280
    l1_laser:                                                                                                               105984
        pass_1:                                                                                                                 42496
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root            None
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root            42496
        energy_sweep:                                                                                                           46080
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root            46080
        perp_sweep: 

## Files - acquisition groups

In [20]:
run_re = re.compile(r'allpackets-\w+-\w+-\d+-(\d+)-\d+\.\d+--.*\.root')
k_str = '290916/gls_laser/allpackets-SPBEUSO-ACQUISITION-20160928-114754-001.001--laser_4hz_2.2mJ_45degalt_2degaz.root'
m = run_re.match(os.path.basename(k_str))
m.group(1)

'114754'

In [21]:
files_acq_groups_list = []
key_join = '_'
run_re = re.compile(r'allpackets-\w+-\w+-\d+-(\d+)-\d+\.\d+--.*\.root')

def flatten_3_acq_groups(r_base_prefix, n_out_dict, k_str, v, files_acq_groups_list=files_acq_groups_list, run_re=run_re):
    entry = [remove_prefix(r_base_prefix[0], original_utah_files_dir), 
             None, 
             None, 
             remove_prefix(k_str, original_utah_files_dir),
             v if v is not None else 0,
             None
            ]
    if len(r_base_prefix) > 2:
        entry[1] = r_base_prefix[1]
        if len(r_base_prefix) > 3:
            entry[2] = key_join.join(r_base_prefix[2:-1])
    
    m = run_re.match(os.path.basename(k_str))
    if m:
        entry[5] = m.group(1)
    
    print(entry)
    files_acq_groups_list.append(tuple(entry))

num_events_by_dirname_by_suffix_dict_2_levels = transform_dict_depths_recursively(num_events_by_dirname_by_suffix_dict_compacted, 3, item_func=flatten_3_acq_groups) #

['011016/GLS', 'l1_laser', 'pass_1', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root', 0, '103619']
['011016/GLS', 'l1_laser', 'pass_1', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root', 42496, '103644']
['011016/GLS', 'l1_laser', 'energy_sweep', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root', 46080, '104412']
['011016/GLS', 'l1_laser', 'perp_sweep', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104904-001.001--l1_laser_perp_sweep.root', 0, '104904']
['011016/GLS', 'l1_laser', 'parralel_sweep', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104912-001.001--l1_laser_parralel_sweep.root', 17408, '104912']
['011016/GLS', 'perSweep', '6_mJ', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-113501-001.001--perSweep6mJ.root', 17280, '113501']
['011016/GLS', 'perSweep', '1_5mJ', '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-114202-001.001--perSwe

In [22]:
print_dicts_recursively(num_events_by_dirname_by_suffix_dict_2_levels)

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS:                                                                            2257280
    l1_laser:                                                                                                               105984
        pass_1:                                                                                                                 42496
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root            None
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root            42496
        energy_sweep:                                                                                                           46080
            /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root            46080
        perp_sweep: 

In [23]:
files_acq_groups_list[0]

('011016/GLS',
 'l1_laser',
 'pass_1',
 '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root',
 0,
 '103619')

In [24]:
files_acq_groups_list[1]

('011016/GLS',
 'l1_laser',
 'pass_1',
 '011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root',
 42496,
 '103644')

In [25]:
files_acq_groups_df = \
    pd.DataFrame(files_acq_groups_list, 
                 columns=['acq_group_l0', 'acq_group_l1', 'acq_group_l2', 'file_pathname', 'entries', 'run'])

In [26]:
files_acq_groups_df

Unnamed: 0,acq_group_l0,acq_group_l1,acq_group_l2,file_pathname,entries,run
0,011016/GLS,l1_laser,pass_1,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,0,103619
1,011016/GLS,l1_laser,pass_1,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,42496,103644
2,011016/GLS,l1_laser,energy_sweep,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,46080,104412
3,011016/GLS,l1_laser,perp_sweep,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,0,104904
4,011016/GLS,l1_laser,parralel_sweep,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,17408,104912
5,011016/GLS,perSweep,6_mJ,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,17280,113501
6,011016/GLS,perSweep,1_5mJ,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,59136,114202
7,011016/GLS,perSweep,800_muJ,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,9600,114849
8,011016/GLS,45%Att,,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,11008,120433
9,011016/GLS,45%Att,,011016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...,14592,120606


In [27]:
files_acq_groups_df.to_csv(os.path.join(data_snippets_dir, 'files_acq_groups.tsv'), sep='\t')

## Eusospb-analisi

In [235]:
eusospb_analisi_pathname = os.path.join(data_snippets_dir, 'eusospb-analisi.turin.txt')

In [123]:
!cat utah_events_directory_analysis/eusospb-analisi.turin.txt

200161004   2 lens config. TA-EUSO Tr.     -10% thresh.     EUSO-Bal.(P=1,R=1) -10% thresh  -20%
RUN	per mJ   pack  	trEC2  EC5   EC8  EC2  EC5  EC8     EC2  EC5  EC8   EC2  EC5  EC8
033506  55  3.0  109    98     98     98   99   99   99      98   98   98    99   99   99
033605  54  2.8  111    99     99     98  100  100  100      99   99   97   100  100   99 
033719  53  2.6  113    98     98     98  100  100  100      99   99   97   100  100   99
033821  52  2.4  110    99     99     98  100  100   98      99   99   96   100  100   96
033935  51  2.2  121    99     99     97   99   99   98      99   99   93    99   99   96
034204  50  2.0  119    99     99     93   99   99   95      99   99   88    99   99   94
034320  49  1.9  114    97     97     86   98   98   94      97   97   84    98   98   91
034423  48  1.8  113    98     98     72   99   99   87      98   98   60    99   99   80
034919  47  1.7  127   100    100     26  100  100   51     100  100   14   100  100  

In [60]:
eusospb_analisi_2_lens__runs = []
eusospb_analisi_3_lens__runs = []

table_entry_simple_re = re.compile(r'^(\d+)\s+(\d+)\s+(\d+(\.\d+)?)')

# table_entry_re = re.compile(
# #                                            TA EUSO                TA-EUSO -10%           EUSO-Bal.               EUSO Bal -10%           EUSO-Bal -20%
# #   RUN       per     mJ             pack    EC2    EC5     EC8     EC2     EC5     EC8    EC2     EC5     EC8     EC2     EC5      EC8    EC2     EC3     EC8
# r'^(\d+)\s+(\d+)\s+(\d+(\.\d+)?)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+((\d+)\s+(\d+)\s+(\d+))?'
# #  1       2       3   4           5       6       7       8       9       10      11      12      13      14      15      16      17     18 19     20      21 
# )

eusospb_analisi_table_column_names = [
    'run', 'per', 'mj',  'pack',  
    'ta_euso_ec2', 'ta_euso_ec5', 'ta_euso_ec8', 
    'ta_euso_10_ec2', 'ta_euso_10_ec5', 'ta_euso_10_ec8', 
    'euso_bal_ec2', 'euso_bal_ec5', 'euso_bal_ec8',
    'euso_bal_10_ec2', 'euso_bal_10_ec5', 'euso_bal_10_ec8',
    'euso_bal_20_ec2', 'euso_bal_20_ec5', 'euso_bal_20_ec8'
]
eusospb_analisi_table_column_types = {
    'mj': float
}
eusospb_analisi_table_column_data = [[] for i in range(len(eusospb_analisi_table_column_names))]
eusospb_analisi_table_num_lens_column_data = [] 

num_lens = -1

with open(eusospb_analisi_pathname, 'r') as f:
    cur_list = None
    for j, l in enumerate(f):
        m_lens = re.search(r'(\d) lens config', l)
        if m_lens:
            num_lens = int(m_lens.group(1))
            if num_lens == 2:
                cur_list = eusospb_analisi_2_lens__runs
            else:
                cur_list = eusospb_analisi_3_lens__runs
        else:
            m_run_num = table_entry_simple_re.search(l)
            if m_run_num:
                cur_list.append(m_run_num.group(1))
                
                line_vals = l.split()
                for i, col in enumerate(eusospb_analisi_table_column_names):
                    v = None
                    if len(line_vals) > i:
                        if col in eusospb_analisi_table_column_types:
                            v = eusospb_analisi_table_column_types[col](line_vals[i])
                        else:
                            v = int(line_vals[i])
                            
                    eusospb_analisi_table_column_data[i].append(v)
                eusospb_analisi_table_num_lens_column_data.append(num_lens)
                    
                    
#                     eusospb_analisi_table_column_data[i].append(
#                         (eusospb_analisi_table_column_types[col](line_vals[i]) \
#                             if eusospb_analisi_table_column_names[i] in eusospb_analisi_table_column_types else \
#                         int(line_vals[i]) ) \
#                             if len(line_vals) > i else None
#                     )
                    

In [100]:
eusospb_analisi_df = \
    pd.DataFrame(list(zip(eusospb_analisi_table_num_lens_column_data, *eusospb_analisi_table_column_data)), 
                 columns=['num_lens'] + eusospb_analisi_table_column_names)

In [101]:
eusospb_analisi_df

Unnamed: 0,num_lens,run,per,mj,pack,ta_euso_ec2,ta_euso_ec5,ta_euso_ec8,ta_euso_10_ec2,ta_euso_10_ec5,ta_euso_10_ec8,euso_bal_ec2,euso_bal_ec5,euso_bal_ec8,euso_bal_10_ec2,euso_bal_10_ec5,euso_bal_10_ec8,euso_bal_20_ec2,euso_bal_20_ec5,euso_bal_20_ec8
0,2,33506,55,3.0,109,98,98,98,99,99,99,98,98,98,99,99,99,,,
1,2,33605,54,2.8,111,99,99,98,100,100,100,99,99,97,100,100,99,,,
2,2,33719,53,2.6,113,98,98,98,100,100,100,99,99,97,100,100,99,,,
3,2,33821,52,2.4,110,99,99,98,100,100,98,99,99,96,100,100,96,,,
4,2,33935,51,2.2,121,99,99,97,99,99,98,99,99,93,99,99,96,,,
5,2,34204,50,2.0,119,99,99,93,99,99,95,99,99,88,99,99,94,,,
6,2,34320,49,1.9,114,97,97,86,98,98,94,97,97,84,98,98,91,,,
7,2,34423,48,1.8,113,98,98,72,99,99,87,98,98,60,99,99,80,,,
8,2,34919,47,1.7,127,100,100,26,100,100,51,100,100,14,100,100,32,,,
9,2,35045,46,1.6,113,100,98,56,100,99,73,100,98,53,100,98,60,,,


In [108]:
per_num_re = re.compile(r'^(\d+)_(per|%)$')
run_re = re.compile(r'allpackets-\w+-\w+-\d+-(\d+)-\d+\.\d+--[^.]+\.root')
packet_size = 128

entries = []

for num_lens in np.unique(eusospb_analisi_df['num_lens']):

    t_filtered_dict, t_found = filter_tree(num_events_by_dirname_by_suffix_dict_compacted, eusospb_analisi_df[eusospb_analisi_df['num_lens'] == num_lens]['run'].astype(str).tolist())
    reduced_t_filtered_dict = remove_single_item_dicts(t_filtered_dict)

#     print(reduced_t_filtered_dict.keys())
#     # print(remove_single_item_dicts(reduced_t_filtered_dict['/mnt/data_sgbc1/SPBDATA/SPB_Utah/041016/GLS_45degaway']).keys())
#     print(reduced_t_filtered_dict['/mnt/data_sgbc1/SPBDATA/SPB_Utah/041016/GLS_45degaway'].keys())
#     print(reduced_t_filtered_dict['/mnt/data_sgbc1/SPBDATA/SPB_Utah/041016/GLS_45degaway']['55_per'].keys())


    for acq_group, per_dict in reduced_t_filtered_dict.items():
        for per_str, files_dict in per_dict.items():
            m = per_num_re.search(per_str)
            if not m:
                continue
            per_val = int(m.group(1))
            for file_pathname, num_frames in files_dict.items():
                m = run_re.search(file_pathname)
                if not m:
                    print('Unexpected file_pathname:', file_pathname)
                    continue
                run_num = m.group(1)
                                
                entries.append((num_lens, int(run_num), per_val, num_frames // packet_size, 
                                remove_prefix(acq_group, original_utah_files_dir),
                                remove_prefix(file_pathname, original_utah_files_dir)))

#                 break
#             break
#         break
#     break


In [110]:
file_entries_df = pd.DataFrame(entries, columns=['num_lens', 'run', 'per', 'pack', 'acq_group', 'file_pathname'])

In [111]:
file_entries_df

Unnamed: 0,num_lens,run,per,pack,acq_group,file_pathname
0,2,33506,55,109,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
1,2,33605,54,111,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
2,2,33719,53,113,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
3,2,33821,52,110,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
4,2,33935,51,121,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
5,2,34204,50,119,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
6,2,34320,49,114,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
7,2,34423,48,113,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
8,2,34919,47,127,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
9,2,35045,46,113,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...


In [128]:
eusospb_analisi_with_pathnames_left_df = \
    pd.merge(eusospb_analisi_df, file_entries_df, how='left', on=['num_lens', 'run', 'per', 'pack'])

eusospb_analisi_with_pathnames_left_df

Unnamed: 0,num_lens,run,per,mj,pack,ta_euso_ec2,ta_euso_ec5,ta_euso_ec8,ta_euso_10_ec2,ta_euso_10_ec5,...,euso_bal_ec5,euso_bal_ec8,euso_bal_10_ec2,euso_bal_10_ec5,euso_bal_10_ec8,euso_bal_20_ec2,euso_bal_20_ec5,euso_bal_20_ec8,acq_group,file_pathname
0,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
1,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
2,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
3,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
4,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
5,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
6,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
7,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
8,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
9,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...


In [126]:
pd.merge(eusospb_analisi_df, file_entries_df, how='right', on=['num_lens', 'run', 'per', 'pack'])

Unnamed: 0,num_lens,run,per,mj,pack,ta_euso_ec2,ta_euso_ec5,ta_euso_ec8,ta_euso_10_ec2,ta_euso_10_ec5,...,euso_bal_ec5,euso_bal_ec8,euso_bal_10_ec2,euso_bal_10_ec5,euso_bal_10_ec8,euso_bal_20_ec2,euso_bal_20_ec5,euso_bal_20_ec8,acq_group,file_pathname
0,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
1,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
2,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
3,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
4,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
5,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
6,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
7,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
8,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
9,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...


In [129]:
eusospb_analisi_with_pathnames_left_df.to_csv(os.path.join(data_snippets_dir, 'eusospb_analisi_with_pathnames_left.tsv'), sep='\t')

In [130]:
pd.merge(eusospb_analisi_df, file_entries_df, how='outer', on=['num_lens', 'run', 'per', 'pack'])

Unnamed: 0,num_lens,run,per,mj,pack,ta_euso_ec2,ta_euso_ec5,ta_euso_ec8,ta_euso_10_ec2,ta_euso_10_ec5,...,euso_bal_ec5,euso_bal_ec8,euso_bal_10_ec2,euso_bal_10_ec5,euso_bal_10_ec8,euso_bal_20_ec2,euso_bal_20_ec5,euso_bal_20_ec8,acq_group,file_pathname
0,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
1,2,33506,55,3.0,109,98,98,98,99,99,...,98,98,99,99,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
2,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
3,2,33605,54,2.8,111,99,99,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
4,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
5,2,33719,53,2.6,113,98,98,98,100,100,...,99,97,100,100,99,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
6,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
7,2,33821,52,2.4,110,99,99,98,100,100,...,99,96,100,100,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
8,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,041016/GLS_45degaway,041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016...
9,2,33935,51,2.2,121,99,99,97,99,99,...,99,93,99,99,96,,,,300916/GLS_45degaway,300916/GLS/allpackets-SPBEUSO-ACQUISITION-2016...


In [81]:
for runs_label, runs_list in [('2 lens',eusospb_analisi_2_lens__runs), ('3 lens', eusospb_analisi_3_lens__runs)]:
    print(runs_label)
    print('-'*150)
    t_filtered_dict, t_found = filter_tree(num_events_by_dirname_by_suffix_dict_compacted, runs_list)
    
    print_dicts_recursively(remove_single_item_dicts(t_filtered_dict), original_utah_files_dir, translate_func=lambda v: v/128)
    
#     break
    
#     print('--------------------')
#     for run_id in runs_list:
#         t_filtered_dict, t_found = filter_tree(num_events_by_dirname_by_suffix_dict_compacted, run_id)
#         print_dicts_recursively(t_filtered_dict, original_utah_files_dir, max_depth=None)
#         print()

    print('='*150)
    print()

2 lens
------------------------------------------------------------------------------------------------------------------------------------------------------
041016/GLS_45degaway:                                                                                                   2102.0
    55_per:                                                                                                                 109.0
        041016/GLS/allpackets-SPBEUSO-ACQUISITION-20161004-033506-001.001--45degaway55per.root                                  109.0
    54_per:                                                                                                                 111.0
        041016/GLS/allpackets-SPBEUSO-ACQUISITION-20161004-033605-001.001--45degaway54per.root                                  111.0
    53_per:                                                                                                                 113.0
        041016/GLS/allpackets-SPBEUSO-ACQUISITION-2016100

In [None]:

    reduced_t_filtered_dict = remove_single_item_dicts(t_filtered_dict)
    
    print(t_filtered_dict.keys())
    print(reduced_t_filtered_dict.keys())
    print(remove_single_item_dicts(reduced_t_filtered_dict['/mnt/data_sgbc1/SPBDATA/SPB_Utah/041016/GLS_45degaway']).keys())
    
#     print(t_filtered_dict['/mnt/data_sgbc1/SPBDATA/SPB_Utah/041016/GLS'].keys())

In [73]:
print_dicts_recursively(num_events_by_dirname_by_suffix_dict_compacted, original_utah_files_dir, max_depth=0, translate_func=lambda v: v/128)

011016/GLS:                                                                                                             17635.0
011016:                                                                                                                 14306.0
011016/dac7_linearity_gain:                                                                                             2015.0
011016/s-curves:                                                                                                        1722.0
011016/trig_tests:                                                                                                      235.0
021016/GLS:                                                                                                             16115.0
021016/LED:                                                                                                             7786.0
021016/SCURVEforLECH:                                                                                        