__I - Instalation of libraries and listing directories of data.__

In [95]:
pip install --upgrade pyabf

Requirement already up-to-date: pyabf in /Users/adrianalejandro/opt/anaconda3/lib/python3.7/site-packages (2.2.3)
Note: you may need to restart the kernel to use updated packages.


In [97]:
import pyabf
import pyabf.tools.memtest 
import matplotlib.pyplot as plt
import numpy as np
import os 
import os.path
from os import path
import csv
import pandas as pnd
from tkinter import Tk
from tkinter.filedialog import askdirectory

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

__II - Functions for obtaining csv metadata.__

In [211]:
def get_txt_metadata(contents):   
    all_header_files = []
    
    for filename in contents:
        if '.txt' not in filename and '.csv' not in filename:
            header_txt = filename + '_header.txt'
            header = open(header_txt,'w')
            all_header_files.append(header_txt)
            abf = pyabf.ABF(filename)
            header.write(abf.headerText)
            header.close()
    return(all_header_files)


In [213]:
def get_csv_metadata(all_header_files):
    
    csv_files = []

    for filename in all_header_files:
        if '.csv' not in filename and '.txt' in filename:
            header = open(filename,'r')
            metadata_str = []
    
            for line in header:
                if '=' in line:
                    metadata_str.append(line)
            
            csv_filename = filename.replace('.txt','.csv')
            metadata_csv = pnd.DataFrame(metadata_str)
        
            metadata_csv.columns = ['Data'] # pandas doesn't like column names to be 0 
            metadata_csv[['Data','Values']] = metadata_csv.Data.str.split(" = ",expand=True) # remove whitespaces too!!!
            metadata_csv.to_csv(csv_filename, sep=',',index=False)
            csv_files.append(csv_filename)
        
    return csv_files


__III - Functions for calculating resistance and peak amplitude values.__

In [122]:
def get_metadata_values(csv,parameter):

    # Obtains raw data string associated to specified parameter in metadata.
    csv_metadata = pnd.read_csv(csv)
    dic_metadata = csv_metadata.set_index('Data').to_dict()['Values'] # totally stolen from stack overflow and its FAST
    raw_str = dic_metadata[parameter] 
    return raw_str

def get_mV_step_interval(raw_str):
    # assumes only a single voltage step in voltage clamp experiments
    parsed_str = raw_str.split(', ')
    parsed_total = (len(parsed_str))
    for i in range(parsed_total):
        if '0.00' not in parsed_str[i]:
            # also stolen: this extracts string inside []
            values = parsed_str[i][parsed_str[i].find("[")+1:parsed_str[i].find("]")]
    step_points = (values.split(':'))
    step_start, step_end = int(step_points[0]), int(step_points[1])
    return step_start, step_end


In [123]:
def get_resistances(step_start, step_end, abf):
    Rs = []
    Ri = []
    for i in abf.sweepList:
        abf.setSweep(i)
        neg_maxI = min(abf.sweepY[step_start:step_end])
        SeriesR = (abf.sweepC[step_start + 10]/neg_maxI) * 1000 #convert from gigaohms to megaohms
        Rs.append(SeriesR)
        Input_I = abf.sweepY[step_end - 10]
        InputR = ((abf.sweepC[step_end -10])/Input_I) * 1000
        Ri.append(InputR) 
    return Ri, Rs 

In [124]:
def get_RiRs_persweep(csv_file,abf):
    parameter = 'sweepEpochs' # This can be switched around if you want to use this function as a template for
                              # functions to call other parameters
    values = get_metadata_values(csv_file,parameter)    
    step_start, step_end = get_mV_step_interval(values)
    Ri, Rs = get_resistances(step_start, step_end, abf)
    return Ri, Rs

In [107]:
def get_peak_pA(abf,csv_files):
    
    parameter = 'dataRate' # tickrate
    peak_pA = []
    peak_pA_ind = []
    peak_pA_s = []
    latency_s = []
    pre_window_s = 0.005 # seconds to look at after artifact to find real starting point for EPSC, 100 ticks
    post_window_s = 0.01 # seconds to look after the real starting point to find EPSC peak, 200 ticks
    i1, i2 = 0, 5000 # first set of wide indices
    
    for i in abf.sweepList:
        
        tickrate = float(get_metadata_values(csv_files[i],parameter))
        abf.setSweep(i)
        
        pre_window_ticks = tickrate*pre_window_s 
        post_window_ticks = tickrate*post_window_s
    
        pos_max_ind = np.argmax(abf.sweepY[i1:i2]) # huge positive artifact

        #Fake calculations to show how numbers are managed with each sum operation
        pre_window_i1 = pos_max_ind + 2
        pre_window_i2 = int(pre_window_i1 + pre_window_ticks)
        # 3100 + 2 = 3102
        # 31200 + 100 = 3202
    
        post_window_i1 = int(pre_window_i1 + (np.argmax(abf.sweepY[pre_window_i1:pre_window_i2])))
        post_window_i2 = int(post_window_i1 + post_window_ticks)
        #3102 + 25 = 3127
        #3127 + 200 = 3327

        peak = min(abf.sweepY[post_window_i1:post_window_i2])
        peak_ind = int(post_window_i1 + (np.argmin(abf.sweepY[post_window_i1:post_window_i2])))
        peak_s = peak_ind/tickrate
        latency = peak_s - (pos_max_ind/tickrate) # peak time normalized to stimulus onset (huge positive current)
        #3127 + 10 = 3137 (index of peak pA)
        
        peak_pA.append(peak)       
        peak_pA_ind.append(peak_ind) 
        peak_pA_s.append(peak_s)
        latency_s.append(latency)
    
    return peak_pA, peak_pA_ind, peak_pA_s, latency_s

__IV - Functions for generating `DataFrame` with all measures above.__

In [159]:
def df_prep(mouse,contents):
    abf_files = []
    csv_files = []

    for filename in contents:
        if '.txt' not in filename and '.csv' not in filename:
            abf_files.append(filename)
        elif '.csv' in filename and mouse not in filename:
            csv_files.append(filename) 
    abf_files.sort()
    csv_files.sort()
    abf_files.pop(0)
    csv_files.pop(0)
    return abf_files, csv_files


def get_stim_currents(mouse,contents): #stimulus currents file must be in csv format in the same folder as its abf's
    for file in contents:
        if mouse in file:
            segment_file = file
            current_steps = pnd.read_csv(segment_file)
            stim_int = current_steps['Stim Intensity (uA)'].tolist()
            stim_int = stim_int[1:-1] # some cleanup 
    return stim_int

def data_summary_percell(abf_files,csv_files,stim_int):
    mean_Ri = []
    mean_Rs = []
    mean_peak_pA = []
    all_peak_times = pnd.DataFrame()
    for i in range(len(abf_files)):
        abf = pyabf.ABF(abf_files[i]) 
        Ri, Rs = get_RiRs_persweep(csv_files[i],abf)
        # Get average resistances per file and save onto dataframe
        mean_Ri.append(np.mean(Ri))
        mean_Rs.append(np.mean(Rs))
    
        # Get peak amplitude of EPSC
        peak_pA, peak_pA_ind, peak_pA_s, latency_s = get_peak_pA(abf,csv_files)
        mean_peak_pA.append(np.mean(peak_pA))
        seg = abf_files[i].strip('.abf')
        segment = [seg for j in abf.sweepList]
        sweep_num = abf.sweepList
        peak_times = pnd.DataFrame(list(zip(peak_pA_ind,peak_pA_s,latency_s,segment)), index=sweep_num, columns =  
                          ["peak_pA_frame", "peak_pA_time_(s)","peak_latency_(s)","sweep_number"])
        all_peak_times = all_peak_times.append(peak_times,ignore_index=False)
    
    cell_df = pnd.DataFrame(list(zip(stim_int, mean_Ri, mean_Rs, mean_peak_pA)), index=abf_files, columns =  
                          ["stimulus_intensity_uA", "Input_Resistance_MOhm", "Series_Resistance_MOhm",
                           'Peak_amplitude_pA'])                       
    pivot_peak_times = all_peak_times.pivot(index=all_peak_times.index, columns='sweep_number') # Tidy the data by sweep number :)
    return cell_df , pivot_peak_times
    

In [215]:
# Master function that generates dataframes containing Rs, Rs, peak_pA and respective timepoints, for each cell

def summarize_cell_VClamp(mouse,cell_dir):
    
    os.chdir(cell_dir) # change directory to cell abf files
    if os.path.exists('.DS_Store') == True: # my own computer creates this given that i use a backup drive, 
        os.remove('.DS_Store')   
        
    contents = os.listdir('.') # list all contents in this cell folder
    all_header_files = get_txt_metadata(contents) 
    all_csv_files = get_csv_metadata(all_header_files)
    all_header_files.sort()
    all_csv_files.sort()
    abf_files, csv_files = df_prep(mouse,contents) #ignore first sweeps that are just an equilibration
    stim_int = get_stim_currents(mouse,contents)
    summary, all_peak_times = data_summary_percell(abf_files,csv_files,stim_int)
    
    return summary, all_peak_times
    

In [216]:

path = askdirectory(title='Select Folder with all data from one genotype') # shows dialog box and return the path 
os.chdir(path)

if os.path.exists('.DS_Store') == True: # my own computer creates this given that i use a backup drive, 
    os.remove('.DS_Store')          
gen_dir = os.getcwd()
mouse_folders = os.listdir('.')

for mouse in mouse_folders:    
    mouse_dir = gen_dir + '/' + mouse # get directory for one mouse
    os.chdir(mouse_dir)  #change directory to that mouse
    if os.path.exists('.DS_Store') == True: # my own computer creates this given that i use a backup drive, 
        os.remove('.DS_Store')    
    cwd = os.getcwd() 
    print(cwd)
    cell_folders = os.listdir(cwd) # list the cells (directories) of that mouse
    print(cell_folders) 
    for cell in cell_folders:
        
        os.chdir(mouse_dir)
        if cell == '.DS_Store':
            os.remove('.DS_Store')    
        elif 'cell' in cell and 'processed' not in cell:
            processed_file = cell + '_processed.csv'
            processed_dir = cwd + '/' + processed_file
            os.mkdir(processed_dir)
            cell_dir = mouse_dir + '/' + cell
            summary, all_peak_times = summarize_cell_VClamp(mouse,cell_dir)
            
            summ_path = processed_dir + '/' + cell + '_summary.csv'
            tp_path = processed_dir + '/'  + cell + '_timepoints.csv'
            summary.to_csv(summ_path, index = True)
            all_peak_times.to_csv(tp_path, index = True)



/Users/adrianalejandro/Desktop/Gomez_rotation/sample data/171219_SCH-028953
['028953slice1cell1', '028953slice1cell2']


