In [1]:
import pandas
import h5py

In [2]:
class espion_file:
    """Loader for erg ESPION CSV files into Python"""
    def __init__(self, filepath, filename, species):
        self.basedir = "/Users/angueyraaristjm/Documents/LiData/invivoERG/"
        self.filepath = filepath
        self.filename = filename
        self.savepath = self.basedir + self.filepath + "/"
        self.fullpath = self.savepath + self.filename + ".csv"
        self.species = species
        #self.metadata = self.pull_metadata()
        #self.datatable = self.pull_datatable()
        #self.data = self.pull_data()
        #self.HDF5remap()

In [3]:
erg = espion_file("20160819/20160819_Sq813", "20160819_Sq813_02_Steps2sG_pre", "Squirrel")

In [4]:
def pull_metadata(erg):
        # pull and parse metadata information
        csvparams = pandas.read_csv(erg.fullpath, header=1, usecols=[0, 1], nrows=10)
        csvparams = csvparams.dropna()
        metadata = dict()
        intfields = ["Steps", "Channels"]
        datefields = ["DOB", "Date performed"]
        for i in range(1, 10):
            if csvparams.Parameter[i] in intfields:
                metadata[csvparams.Parameter[i]] = int(csvparams.Value[i])
            elif csvparams.Parameter[i] in datefields:
                metadata[csvparams.Parameter[i]] = pandas.to_datetime(csvparams.Value[i])
            elif csvparams.Parameter[i] == "Family Name":
                metadata["ID"] = csvparams.Value[i]
            else: 
                metadata[csvparams.Parameter[i]] = csvparams.Value[i]
        metadata['Species'] = erg.species
        return metadata

In [5]:
erg.metadata=pull_metadata(erg)

In [6]:
erg.metadata

{'Channels': 2,
 'DOB': Timestamp('2016-05-20 00:00:00'),
 'Date performed': Timestamp('2016-08-19 10:14:07'),
 'Gender': 'Male',
 'ID': 'LI813',
 'Protocol': 'Iseries_steps2sG[12.1]',
 'Species': 'Squirrel',
 'Steps': 10,
 'Stimulator': 'ColorDome Ganzfeld',
 'Test method': 'ERG Test'}

In [7]:
def pull_datatable(erg):
        # pull datatable to parse data
        fullcsv = pandas.read_csv(erg.fullpath, header=0)
        if "Data Table" in fullcsv:
            print("Data Table is Right")
            datatable = pandas.read_csv(erg.fullpath, header=1, usecols=[3, 4, 5, 8])
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        elif fullcsv.ix[12, 0] == "Data Table":
            print("Data Table is Below 12")
            datatable = pandas.read_csv(erg.fullpath, header=1, usecols=[0, 1, 2, 5], skiprows=13)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        elif fullcsv.ix[13, 0] == "Data Table":
            print("Data Table is Below 13")
            datatable = pandas.read_csv(erg.fullpath, header=1, usecols=[0, 1, 2, 5], skiprows=14)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        else:
            print("Did not find datatable")
        return datatable

In [13]:
erg.datatable=pull_datatable(erg)

Data Table is Below 12


  if self.run_code(code, result):


In [14]:
fullcsv = pandas.read_csv(erg.fullpath, header=0, low_memory=False)

In [15]:
fullcsv

Unnamed: 0,Header Table,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 172,Unnamed: 173,Unnamed: 174,Unnamed: 175,Unnamed: 176,Unnamed: 177,Unnamed: 178,Unnamed: 179,Unnamed: 180,Unnamed: 181
0,Parameter,Value,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,Protocol,Iseries_dark_XeMax[9/29/2010],,,,,,,,,...,,,,,,,,,,
3,Test method,ERG Test,,,,,,,,,...,,,,,,,,,,
4,Stimulator,ColorDome Ganzfeld,,,,,,,,,...,,,,,,,,,,
5,Date performed,08-19-2016 9:57:24 AM,,,,,,,,,...,,,,,,,,,,
6,Steps,13,,,,,,,,,...,,,,,,,,,,
7,Channels,2,,,,,,,,,...,,,,,,,,,,
8,Family Name,LI813,,,,,,,,,...,,,,,,,,,,
9,DOB,05-20-2016,,,,,,,,,...,,,,,,,,,,


In [12]:
erg.datatable

Unnamed: 0,Step,Column,Chan,Trials
1,1,7,1,1
2,1,8,2,1
3,2,10,1,5
4,2,10,1,5
5,2,20,2,5
6,2,20,2,5
7,3,31,1,5
8,3,36,2,5
9,4,42,1,5
10,4,47,2,5


In [21]:
erg.metadata

{'Channels': 2,
 'DOB': Timestamp('2016-05-20 00:00:00'),
 'Date performed': Timestamp('2016-08-18 14:40:00'),
 'Gender': 'Male',
 'ID': 'LI852',
 'Protocol': 'AdaptTau3[9/29/2010]',
 'Species': 'Squirrel',
 'Steps': 2,
 'Stimulator': 'ColorDome Ganzfeld',
 'Test method': 'ERG Test'}

In [52]:
def pull_data(erg):
        # parse data based on data table
        fullcsv = pandas.read_csv(erg.fullpath, header=0)
        data = dict()
        for step in range(erg.metadata['Steps']):
            stepname = "Step" + str(step+1).zfill(2)
            print(stepname)
            ch1start = erg.datatable.Column[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==1)]
            ch2start = erg.datatable.Column[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==2)]
            ntrials = erg.datatable.Trials[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==1)]
            if len(ch1start)==1:
                #normally each step runs only once but if it's repeated, ESPION doubles the entries
                ch1start = int(ch1start)
                ch2start = int(ch2start-1)
                ntrials = int(ntrials)
                data[stepname] = espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
            elif len(ch1start.unique())==1:
                #found duplicates but all have the same column start
                ch1start = int(ch1start.unique())
                ch2start = int(ch2start.unique()-1)
                ntrials = int(ntrials.sum())
                data[stepname] = espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
        return data

def espion_step(ch1start, ch2start, ntrials, csvtable):
    print(ch1start)
    print(ch2start)
    """Loader for a single erg ESPION step"""
    colstart = ch1start-1
    colend = colstart+1+(ntrials*2)
    currcsv = csvtable.ix[0:, colstart:colend].copy(deep=0)
    currcsv = currcsv.dropna().reset_index(drop=True)
    currcsv = currcsv.drop(0).reset_index(drop=True)
    colnames = []
    ch1cnt = 0
    ch2cnt = 0
    for i in range(0, len(currcsv.columns)):
        currcsv.ix[0:, i] = pandas.to_numeric(currcsv.ix[0:, i])
        if i == 0:
            colnames.append('t')
        elif 1 <= i < 1+ntrials:
            ch1cnt += 1
            colnames.append('L' + str(ch1cnt).zfill(2))
        elif 1+ntrials <= i < 1+(ntrials*2):
            ch2cnt += 1
            colnames.append('R' + str(ch2cnt).zfill(2))
    currcsv.columns = colnames
    currcsv = currcsv.divide(1000)
    csvoutput = currcsv.copy()
    return csvoutput

In [53]:
pull_data(erg)

Step01
7
36
Step02


{'Step01':          t      L01     L02     L03     L04     L05     L06     L07     L08  \
 0   -0.050   31.235  -0.675 -10.066   6.887  -7.121   7.448  -6.117  -4.571   
 1   -0.049   29.754   1.071 -10.922   7.460  -7.689   7.637  -5.329  -2.783   
 2   -0.048   28.583   1.928 -11.159   7.514  -6.473   9.572  -4.910  -3.265   
 3   -0.047   26.032   1.244 -12.339   5.742  -6.017  11.747  -8.047  -5.145   
 4   -0.046   23.465   1.323 -14.234   5.847  -5.977  12.676  -9.125  -6.259   
 5   -0.045   24.842   0.707 -14.177   7.232  -6.593  13.060  -6.983  -7.877   
 6   -0.044   26.511  -1.297 -13.834   7.172  -6.951  13.684  -7.828  -7.950   
 7   -0.043   26.328  -2.488 -13.653   5.718  -7.131  12.047  -9.149  -7.125   
 8   -0.042   26.978  -2.656 -12.627   3.059  -5.688  11.363  -8.815  -6.925   
 9   -0.041   28.325  -0.804 -12.206   0.221  -3.182  13.327  -9.046  -7.274   
 10  -0.040   29.925   0.562 -11.919  -1.331  -3.438  12.179  -9.683  -6.885   
 11  -0.039   33.646  -1.078  

In [46]:
fullcsv = pandas.read_csv(erg.fullpath, header=0)
data = dict()
for step in range(erg.metadata['Steps']):
    stepname = "Step" + str(step+1).zfill(2)
    print(stepname)
    ch1start = erg.datatable.Column[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==1)]
    ch2start = erg.datatable.Column[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==2)]
    ntrials = erg.datatable.Trials[(erg.datatable.Step==(int(step+1))) & (erg.datatable.Chan==1)]
    if len(ch1start)==1:
        #normally each step runs only once but if it's repeated, ESPION doubles the entries
        ch1start = int(ch1start)
        ch2start = int(ch2start-1)
        ntrials = int(ntrials)
    elif len(ch1start.unique())==1:
        #found duplicates but all have the same column start
        ch1start = int(ch1start.unique())
        ch2start = int(ch2start.unique()-1)
        ntrials = int(ntrials.sum())

Step01
Step02


In [50]:
len(ch1start.unique())

0

In [54]:
class espion_file:
    """Loader for erg ESPION CSV files into Python"""
    def __init__(self, filepath, filename, species):
        self.basedir = "/Users/angueyraaristjm/Documents/LiData/invivoERG/"
        self.filepath = filepath
        self.filename = filename
        self.savepath = self.basedir + self.filepath + "/"
        self.fullpath = self.savepath + self.filename + ".csv"
        self.species = species
        self.metadata = self.pull_metadata()
        self.datatable = self.pull_datatable()
        self.data = self.pull_data()
        self.HDF5remap()
    
    def pull_metadata(self):
        # pull and parse metadata information
        csvparams = pandas.read_csv(self.fullpath, header=1, usecols=[0, 1], nrows=10)
        csvparams = csvparams.dropna()
        metadata = dict()
        intfields = ["Steps", "Channels"]
        datefields = ["DOB", "Date performed"]
        for i in range(1, 10):
            if csvparams.Parameter[i] in intfields:
                metadata[csvparams.Parameter[i]] = int(csvparams.Value[i])
            elif csvparams.Parameter[i] in datefields:
                metadata[csvparams.Parameter[i]] = pandas.to_datetime(csvparams.Value[i])
            elif csvparams.Parameter[i] == "Family Name":
                metadata["ID"] = csvparams.Value[i]
            else: 
                metadata[csvparams.Parameter[i]] = csvparams.Value[i]
        metadata['Species'] = self.species
        return metadata
                
    def pull_datatable(self):
        # pull datatable to parse data
        fullcsv = pandas.read_csv(self.fullpath, header=0)
        if "Data Table" in fullcsv:
            # print("Data Table is Right")
            datatable = pandas.read_csv(self.fullpath, header=1, usecols=[3, 4, 5, 8])
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        if fullcsv.ix[12, 0] == "Data Table":
            # print("Data Table is Below")
            datatable = pandas.read_csv(self.fullpath, header=1, usecols=[0, 1, 2, 5], skiprows=13)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        return datatable
    
    def pull_data(self):
        # parse data based on data table
        fullcsv = pandas.read_csv(self.fullpath, header=0)
        data = dict()
        for step in range(self.metadata['Steps']):
            stepname = "Step" + str(step+1).zfill(2)
            # print(stepname)
            ch1start = self.datatable.Column[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==1)]
            ch2start = self.datatable.Column[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==2)]
            ntrials = self.datatable.Trials[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==1)]
            if len(ch1start)==1:
                #normally each step runs only once but if it's repeated, ESPION doubles the entries
                ch1start = int(ch1start)
                ch2start = int(ch2start-1)
                ntrials = int(ntrials)
                data[stepname] = espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
            elif len(ch1start.unique())==1:
                #found duplicates but all have the same column start
                ch1start = int(ch1start.unique())
                ch2start = int(ch2start.unique()-1)
                ntrials = int(ntrials.sum())
                data[stepname] = self.espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
        return data
    
    @staticmethod
    def espion_step(ch1start, ch2start, ntrials, csvtable):
        """Loader for a single erg ESPION step"""
        colstart = ch1start-1
        colend = colstart+1+(ntrials*2)
        currcsv = csvtable.ix[0:, colstart:colend].copy(deep=0)
        currcsv = currcsv.dropna().reset_index(drop=True)
        currcsv = currcsv.drop(0).reset_index(drop=True)
        colnames = []
        ch1cnt = 0
        ch2cnt = 0
        for i in range(0, len(currcsv.columns)):
            currcsv.ix[0:, i] = pandas.to_numeric(currcsv.ix[0:, i])
            if i == 0:
                colnames.append('t')
            elif 1 <= i < 1+ntrials:
                ch1cnt += 1
                colnames.append('L' + str(ch1cnt).zfill(2))
            elif 1+ntrials <= i < 1+(ntrials*2):
                ch2cnt += 1
                colnames.append('R' + str(ch2cnt).zfill(2))
        currcsv.columns = colnames
        currcsv = currcsv.divide(1000)
        csvoutput = currcsv.copy()
        return csvoutput

    def HDF5remap(self):
        dt = h5py.special_dtype(vlen=bytes)
        intfields = ["Steps", "Channels"]
        
        h5name = self.savepath + self.filename + ".h5"
        print('\nSaving h5 file...')
        with h5py.File(h5name, 'w') as hfile:
#             print('\tFrom datatable:')
            for col in self.datatable.columns:
                hfile.create_dataset(col.replace(' ','_'), data=self.datatable.get(col))
#                 print('\t\t'+ col)
#             print('\tFrom metadata:')
            for key in self.metadata:
                if key in intfields:
                    hfile.attrs.create(key.replace(' ','_'), data=self.metadata[key])
                else:
                    hfile.attrs.create(key.replace(' ','_'), data=str(self.metadata[key]), dtype=dt)
#                 print('\t\t' + key)
            # print('\tFrom data:')
            for step in self.data:
                group = hfile.create_group(step)
                group.create_dataset('t', data=self.data[step].filter(regex = 't'))
                group.create_dataset('L', data=self.data[step].filter(regex = 'L'))
                group.create_dataset('R', data=self.data[step].filter(regex = 'R'))
                # print('\t\t' + step)
        print('Saved to: ' + h5name)


In [55]:
a = espion_file("20160818/20160818_Sq852", "20160818_Sq852_04_Flashes_pre", "Squirrel")


Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20160818/20160818_Sq852/20160818_Sq852_04_Flashes_pre.h5
