**<h1>DataFrame for FENICS 1</h1>**
<p>Script to unify data inside one single Data frame, to perform data analyses.</p>

**Dependencies / Modules**

In [1]:
import pandas as pd
import os
import re

**Data Frame intialization**

In [2]:
tuples = [('Board', 'name'), ('Board', 'version'),
          ('Board', 'id'), ('Board', 'code'),('Board', 'burnings'),
          ('Board','date'),('Board','time'),
          ('Board','specialTests'),('Board','burns')]
          #('FastResult','FastResult'),('SlowResult','SlowResult')]
mux = pd.MultiIndex.from_tuples(tuples)
data = pd.DataFrame(columns=mux)


**Parsing the folders :** Parsing the path name infos as a dictionary using regular expressions.

In [3]:
#covering all naming conventions used in the folders using regular expressions (regex)
subfolder_regex= r'([A-Z]+)(\d)(\d+)([A-Za-z]+)(\d*)'
subsubfolder_regex = r'(\d{4})[-_]?(\d{2})?[-_]?(\d{2})?[-_]?(\d{2})?[-_]?(\d{2})?[-_]?([A-Za-z]+[\dA-Za-z]+)?[-_]?([A-Za-z]+[\dA-Za-z]+)?[-_]?'

def data_parser(path):
    parsed_path = path.split('/')
    temp_dict = {}
    
    #Parsing subfolder informations
    match = re.match(subfolder_regex,parsed_path[0])    
    if match :
        temp_dict = {
            ('Board','name') : match.group(1),
            ('Board','version') : match.group(2),
            ('Board','id') : match.group(3),
            ('Board','code') : match.group(4),
            ('Board','burnings') : match.group(5) if match.group(5) else '1',
        }

    
    #parsing subsubfolder informations
    match = re.match(subsubfolder_regex,parsed_path[1])
    
    if match:
        temp_dict[('Board','date')]=f'{match.group(1)}-{match.group(2)}-{match.group(3)}' #formated year-month-day
        if match.group(4) == None and match.group(5) == None :
            temp_dict[('Board','time')]= None #No time for test
        else :
            temp_dict[('Board','time')]=f'{match.group(4)}:{match.group(5)}' #formated Hours:Minutes
                
        if match.group(6) != match.group(7) : #checking if it got special tests
            temp_dict[('Board','specialTests')] = f'{match.group(6)}-{match.group(7)}'
        else :
            temp_dict[('Board','specialTests')] = None
        
    #parsing the channel
    match = re.search(r"(\d+)$",parsed_path[1]) 
    if match : temp_dict[('Board','channel')] = match.group(1)
    
    #Loading the json data in the dataframe
    temp_fastResult = pd.read_json(mainDataDir+path+'FastResult.json',typ='series')
    for index in temp_fastResult.index :
        temp_dict[('FastResult',index)] = temp_fastResult[index]

    temp_slowResult = pd.read_json(mainDataDir+path+'SlowResult.json')
    for index, row in temp_slowResult.iterrows() :
        temp_dict[('SlowResult','Gain '+str(row.name))] = row
     
    return temp_dict

**Folder scanning function :** Function that scans the folders looking for needed json files

In [8]:
def subfolder_explorer(filtredFolder, data) :
    """function that explores given repertory list to get FENICS data
    Takes a list of folders, and an empty dataframe, returns a dataframe filled with data"""
    for subfolder in filtredFolder :
        for subsubfolder in os.listdir(mainDataDir+subfolder) :
            for fileName in os.listdir(mainDataDir+subfolder+"/"+subsubfolder) :
                if fileName.endswith("SlowResult.json") and subsubfolder.startswith("20"):
                    path = subfolder + "/" +subsubfolder + "/"
                    #print('/AtlasDisk/user/FENICS/'+path+'FastResult.json')
                    data = data.append(data_parser(path),ignore_index=True)

    return data

**Script:** Setting up the main directory to look for data.

In [5]:
mainDataDir = "/AtlasDisk/user/FENICS/"
fenixFolder = os.listdir(mainDataDir)
FENICS_to_analyse = ['FENICS115','FENICS114','FENICS113','FENICS112','FENICS111','FENICS110','FENICS109','FENICS108','FENICS107','FENICS106','FENICS105','FENICS104','FENICS101']
for FENICS in FENICS_to_analyse :
    filtredFolder = [string for string in fenixFolder if string.startswith(FENICS) or string == FENICS]
    data = subfolder_explorer(filtredFolder , data)
    print(f"parsing {FENICS} data...", end='\r')
print(f"Parsing done with success.", end='\r')

Parsing done with success.

**Visualization :** Visualizing the data

In [6]:
data

Unnamed: 0_level_0,Board,Board,Board,Board,Board,Board,Board,Board,Board,Board,...,FastResult,FastResult,FastResult,FastResult,SlowResult,SlowResult,SlowResult,SlowResult,SlowResult,SlowResult
Unnamed: 0_level_1,name,version,id,code,burnings,date,time,specialTests,burns,channel,...,NoiseLG,NoiseLGIG1,NoiseLGIG2,NoiseLGIG6,Gain 0,Gain 1,Gain 2,Gain 3,Gain 4,Gain 5
0,FENICS,1,15,FDrb,1,2021-03-25,14:02,,,0,...,"{'Mu': '97.73063897963084', 'Sigma': '1.078502...","{'Mu': '97.71968858563208', 'Sigma': '1.062337...","{'Mu': '97.72812743018778', 'Sigma': '1.071447...","{'Mu': '97.75260598880364', 'Sigma': '1.067633...",Linearity {'SlopeLin': '4.800434118898377...,Linearity {'SlopeLin': '400.4043704835753...,Linearity {'SlopeLin': '405.1929872194111...,Linearity {'SlopeLin': '805.6581838683687...,Linearity {'SlopeLin': '2001.734544459879...,Linearity {'SlopeLin': '2401.804587392683...
1,FENICS,1,15,FDrb,1,2021-05-12,,FrameCap-None,,0,...,"{'Mu': '97.9421693343533', 'Sigma': '1.0775347...","{'Mu': '97.91786733146442', 'Sigma': '1.064455...","{'Mu': '97.92197033389753', 'Sigma': '1.062775...","{'Mu': '97.94258144621692', 'Sigma': '1.064248...",Linearity {'SlopeLin': '4.799781453381647...,Linearity {'SlopeLin': '400.3024022895137...,Linearity {'SlopeLin': '405.1085397607352...,Linearity {'SlopeLin': '805.4705270205782...,Linearity {'SlopeLin': '2001.340795050867...,Linearity {'SlopeLin': '2401.295795597048...
2,FENICS,1,15,FDrb,2,2021-05-20,,FrameCap-None,,0,...,"{'Mu': '97.88502629427867', 'Sigma': '1.069074...","{'Mu': '97.85770768256093', 'Sigma': '1.062647...","{'Mu': '97.86652834842384', 'Sigma': '1.069020...","{'Mu': '97.90671046336503', 'Sigma': '1.075777...",Linearity {'SlopeLin': '4.799550781432409...,Linearity {'SlopeLin': '399.9971280358112...,Linearity {'SlopeLin': '404.7915995943949...,Linearity {'SlopeLin': '804.8468141688262...,Linearity {'SlopeLin': '1999.531064377251...,Linearity {'SlopeLin': '2399.213927405123...
3,FENICS,1,15,FDrb,3,2021-06-02,,FrameCap-None,,0,...,"{'Mu': '97.85509625912006', 'Sigma': '1.064791...","{'Mu': '97.76945721135665', 'Sigma': '1.077966...","{'Mu': '97.74491392485916', 'Sigma': '1.079422...","{'Mu': '97.82791291282125', 'Sigma': '1.072149...",Linearity {'SlopeLin': '4.799581416039028...,Linearity {'SlopeLin': '399.9584686127214...,Linearity {'SlopeLin': '404.7546769985423...,Linearity {'SlopeLin': '804.724032888586'...,Linearity {'SlopeLin': '1999.278749469888...,"Linearity {'SlopeLin': '2398.9249993013',..."
4,FENICS,1,15,FDrb,4,2021-06-18,,FrameCap-None,,0,...,"{'Mu': '97.73756537651515', 'Sigma': '1.062712...","{'Mu': '97.73424852265454', 'Sigma': '1.064383...","{'Mu': '97.68421765923547', 'Sigma': '1.068840...","{'Mu': '97.64868184150247', 'Sigma': '1.080261...",Linearity {'SlopeLin': '4.800045945476822...,Linearity {'SlopeLin': '400.0654628970622...,Linearity {'SlopeLin': '404.8618182269374...,Linearity {'SlopeLin': '805.0153808529218...,Linearity {'SlopeLin': '1999.938983976113...,Linearity {'SlopeLin': '2399.772356947660...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
589,FENICS,1,01,FDb,5,2021-09-14,,MB00-None,,6,...,"{'Mu': '100.90917777092793', 'Sigma': '1.09368...","{'Mu': '100.88304230733037', 'Sigma': '1.10039...","{'Mu': '100.82791043383493', 'Sigma': '1.08199...","{'Mu': '100.87442641832294', 'Sigma': '1.09332...",Linearity {'SlopeLin': '4.801147423678706...,Linearity {'SlopeLin': '404.1994929655893...,Linearity {'SlopeLin': '408.9989555982772...,Linearity {'SlopeLin': '812.4868984095308...,Linearity {'SlopeLin': '2018.505238252882...,Linearity {'SlopeLin': '2421.388015141473...
590,FENICS,1,01,FDb,5,2021-09-15,,MB00-ExtAll,,6,...,"{'Mu': '100.7611736126435', 'Sigma': '1.087949...","{'Mu': '100.75889122404777', 'Sigma': '1.08842...","{'Mu': '100.76253110759612', 'Sigma': '1.07845...","{'Mu': '100.73671604462211', 'Sigma': '1.09172...",Linearity {'SlopeLin': '4.802587047851404...,Linearity {'SlopeLin': '404.2835517293621...,Linearity {'SlopeLin': '409.0859785983579...,Linearity {'SlopeLin': '812.6235065958737...,Linearity {'SlopeLin': '2019.016410415903...,Linearity {'SlopeLin': '2422.045728198938...
591,FENICS,1,01,FDb,5,2021-10-05,08:50,,,10,...,"{'Mu': '116.53436194708264', 'Sigma': '1.04509...","{'Mu': '116.48817270180528', 'Sigma': '1.05336...","{'Mu': '116.47058271952874', 'Sigma': '1.05245...","{'Mu': '116.52292447153177', 'Sigma': '1.05297...",Linearity {'SlopeLin': '4.806501740017488...,Linearity {'SlopeLin': '404.6429439476466...,Linearity {'SlopeLin': '409.4515289323368...,Linearity {'SlopeLin': '813.3578870978215...,Linearity {'SlopeLin': '2021.050204973065...,Linearity {'SlopeLin': '2424.578330753663...
592,FENICS,1,01,FDb,5,2021-10-05,10:30,,,10,...,"{'Mu': '116.69378382844607', 'Sigma': '1.04134...","{'Mu': '116.78618570700154', 'Sigma': '1.03548...","{'Mu': '116.78498416850381', 'Sigma': '1.05911...","{'Mu': '116.76657611763265', 'Sigma': '1.06308...",Linearity {'SlopeLin': '4.805977909076233...,Linearity {'SlopeLin': '404.6188032062316...,Linearity {'SlopeLin': '409.4244204000505...,Linearity {'SlopeLin': '813.3147816967069...,Linearity {'SlopeLin': '2020.89208889589'...,Linearity {'SlopeLin': '2424.232488892763...


**Saving the file :** saving the file as a JSON

In [7]:
data.to_json('/AtlasDisk/home2/sisaid/data/FENICS_data.json')

In [10]:
burnstiming = pd.read_csv('/AtlasDisk/home2/sisaid/data/burns.csv')

ParserError: Error tokenizing data. C error: Expected 1 fields in line 3, saw 14


In [3]:
A = 2

In [4]:
A

2