In [17]:
import ROOT
import os
import sys
import glob
import collections
import re
import numpy as np

In [3]:
data_snippets_dir = 'utah_events_directory_analysis'
os.makedirs(data_snippets_dir, exist_ok=True)

In [60]:
# utah_files_glob = '/home/spbproc/SPBDATA_utah/*'
original_utah_files_dir = '/mnt/data_sgbc1/SPBDATA/SPB_Utah'
original_utah_files_dirs = original_utah_files_dir + '/[0-9]*'

In [6]:
# l = [f for f in glob.glob(original_utah_files_dir+'/[0-9]*/*.root', recursive=True)]
# l = [os.path.join(dp, f) for dp, dn, fn in os.walk(original_utah_files_dir) for f in fn if f.endswith('.root')]

root_files = []

for d in glob.glob(original_utah_files_dirs):
    if not os.path.isdir(d):
        continue
    root_files += [os.path.join(dp, f) for dp, dn, fn in os.walk(d) for f in fn if f.endswith('.root')]

In [7]:
print(len(root_files))

988


In [8]:
for p in sorted(root_files):
    print(p)

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104904-001.001--l1_laser_perp_sweep.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104912-001.001--l1_laser_parralel_sweep.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-113501-001.001--perSweep6mJ.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-114202-001.001--perSweep1_5mJ.root
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-114849-001.001--perSweep800muJ.root
/mnt/data_sgbc1/SPBDATA

In [9]:
# [[(w1, distance.levenshtein(w1,w2)) for w1 in root_files] for w2 in [root_files[0]]]

In [10]:
num_events_counts_file_pathname = os.path.join(data_snippets_dir, 'num_events_dict.tsv')
print(num_events_counts_file_pathname)

utah_events_directory_analysis/num_events_dict.tsv


In [12]:
num_events_dict = collections.OrderedDict()
if not os.path.exists(num_events_counts_file_pathname):
    for i, p in enumerate(sorted(root_files)):
        if i % 10 == 0:
            print('{}/{}'.format(i+1, len(root_files)))
            sys.stdout.flush()
        f = ROOT.TFile.Open(p)
        ent = None
        if f and not f.IsZombie():
            t = f.Get('tevent')
            if t:
                ent = t.GetEntries()
            f.Close()
        if f:
            del f
        num_events_dict[p] = ent
            
    with open(num_events_counts_file_pathname,'w') as f:
        for k, v in num_events_dict.items():
            print('{}\t{}'.format(k,str(v)), file=f)
    
else:
    print('Loading existing table:', num_events_counts_file_pathname)
    with open(num_events_counts_file_pathname,'r') as f:
        for l in f:
            c = l[:-1].split('\t')
            num_events_dict[c[0]] = int(c[1]) if c[1] != 'None' else None

Loading existing table: utah_events_directory_analysis/num_events_dict.tsv


In [14]:
num_events_dict

OrderedDict([('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root',
              None),
             ('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root',
              42496),
             ('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root',
              46080),
             ('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104904-001.001--l1_laser_perp_sweep.root',
              None),
             ('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104912-001.001--l1_laser_parralel_sweep.root',
              17408),
             ('/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-113501-001.001--perSweep6mJ.root',
              17280),
             ('/mnt/data_sgbc1

In [18]:
np.sum([1 for v in num_events_dict.values() if v is not None])

795

In [19]:
num_events_by_dirname_dict = collections.OrderedDict()
for p, c in num_events_dict.items():
    d = os.path.dirname(p)
    if d not in num_events_by_dirname_dict:
        num_events_by_dirname_dict[d] = collections.OrderedDict()
    num_events_by_dirname_dict[d][p] = c

In [20]:
for d, sub_dict in num_events_by_dirname_dict.items():
    print(d)
    for p, c in sub_dict.items():
        print('\t{}\t{}'.format(p,c))

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root	None
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root	42496
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root	46080
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104904-001.001--l1_laser_perp_sweep.root	None
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104912-001.001--l1_laser_parralel_sweep.root	17408
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-113501-001.001--perSweep6mJ.root	17280
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-114202-001.001--perSweep1_5mJ.root	59136
	/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpack

In [37]:
num_events_by_dirname_by_suffix_dict = collections.OrderedDict()

for p, c in num_events_dict.items():
    
    d = os.path.dirname(p)
    if d not in num_events_by_dirname_by_suffix_dict:
        num_events_by_dirname_by_suffix_dict[d] = collections.OrderedDict()
        
    suffix_matches = re.search(r'--(.*)\.root$', p)

    suffix = None

    if suffix_matches:
        suffix = suffix_matches.group(1)

    groups = []

    if suffix:

        for suffix_part in suffix.split('_'):

            subsuffix_matches = re.search(r'^(\d*[a-zA-Z_]{2,})(\d+)([^0-9].*)?$', suffix_part)

            if subsuffix_matches:
                groups += [g for g in subsuffix_matches.groups() if g is not None]
            else:
                groups.append(suffix_part)

                
    base_dict = num_events_by_dirname_by_suffix_dict[d]
    
    for g in groups:
        if g not in base_dict:
            base_dict[g] = collections.OrderedDict()
        base_dict = base_dict[g]
    
    base_dict[p] = c

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS
/mnt/data_sgbc1/SPBDATA/SPB_Utah

In [106]:
def count_vals_recursively(base_dict):
    tot_v = 0
    for k,v in base_dict.items():
        if isinstance(v,dict):
            tot_v += count_vals_recursively(v)
        else:
            tot_v += v if v is not None else 0
    return tot_v
                

def print_dicts_recursively(base_dict, removed_prefix=None, offset=0, width=120, tab_str='    ', max_depth=None, count_for_nodes=True, offset_numbers=True):
    if width is None:
        line_format = '{}{}'
    else:
        line_format = '{:'+str(width)+'}' + (tab_str*offset if offset_numbers else '') + '{}'
    for k,v in base_dict.items():
        if removed_prefix is not None:
            if k[:len(removed_prefix)] == removed_prefix:
                k = k[len(removed_prefix)+1:]
                
        if max_depth is None or offset <= max_depth:
            if isinstance(v,dict):
                k_str = (tab_str*offset)+k+':'
                if count_for_nodes:
                    print(line_format.format(k_str, count_vals_recursively(v)))
                else:
                    print(k_str)
                
#                 count_vals_recursively
                
                print_dicts_recursively(v, removed_prefix, offset+1, width, tab_str, max_depth)
            else:
                print(line_format.format((tab_str*offset)+k,str(v)))

In [56]:
def remove_single_item_dicts(base_dict, offset=0):
    
    if not isinstance(base_dict, dict) or len(base_dict) == 0 or \
            (len(base_dict) == 1) and not isinstance(list(base_dict.values())[0], dict):
        return base_dict
    
    if len(base_dict) == 1:
        for k,v in base_dict.items():
            return k, remove_single_item_dicts(v)
        
    else:
        out_dict = collections.OrderedDict()
        for k,v in base_dict.items():
            if not isinstance(v, dict):
                out_dict[k] = v
            else:
                ret = remove_single_item_dicts(v, offset+1)
                if not isinstance(ret, tuple):
                    out_dict[k] = ret
                else:
                    new_k = k
                    while isinstance(ret, tuple):
                        new_k += '_' + ret[0]
                        ret = remove_single_item_dicts(ret[1], offset+1)
                    out_dict[new_k] = ret
        return out_dict

In [44]:
print_dicts_recursively(num_events_by_dirname_by_suffix_dict)

/mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS:
  l1:
    laser:
      pass:
        1:
          /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root: None
          /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root: 42496
      energy:
        sweep:
          /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root: 46080
      perp:
        sweep:
          /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104904-001.001--l1_laser_perp_sweep.root: None
      parralel:
        sweep:
          /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104912-001.001--l1_laser_parralel_sweep.root: 17408
  perSweep:
    6:
      mJ:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-113

    43:
      %:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-045437-001.001--vertical43%.root: 896
    60:
      %1Hz:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-053352-001.001--vertical60%1Hz.root: None
    70:
      %1Hz:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-053533-001.001--vertical70%1Hz.root: 6528
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-054017-001.001--vertical70%1Hz.root: 16256
    800:
      muJ:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-082044-001.001--vertical800muJ.root: None
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-082056-001.001--vertical800muJ.root: 50688
    400:
      muJ:
        /mnt/data_sgbc1/SPBDATA/SPB_Utah/021016/GLS/allpackets-SPBEUSO-ACQUISITION-20161002-082319-00

In [58]:
num_events_by_dirname_by_suffix_dict_compacted = remove_single_item_dicts(num_events_by_dirname_by_suffix_dict)

In [110]:
print_dicts_recursively(num_events_by_dirname_by_suffix_dict_compacted, original_utah_files_dir, max_depth=None)

011016/GLS:                                                                                                             2257280
    l1_laser:                                                                                                               105984
        pass_1:                                                                                                                 42496
            011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103619-001.001--l1_laser_pass1.root                                  None
            011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-103644-001.001--l1_laser_pass1.root                                  42496
        energy_sweep:                                                                                                           46080
            011016/GLS/allpackets-SPBEUSO-ACQUISITION-20161001-104412-001.001--l1_laser_energy_sweep.root                           46080
        perp_sweep:                                         