In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import datetime

In [2]:
# calculate percentage correct
def pc_correct(df):
    main_trials = df[df['Correction_Trial']==0] # don't look at correction trials
    score_sum = float(main_trials['Correct_Response'].sum())
    trial_num = float(len(main_trials))
    score = score_sum/trial_num
    return score

In [3]:
# calculate sum of correction trials
def cts(df):
    cts = df['Correction_Trial'].sum()
    return cts

In [4]:
# calculate intiation time
def initiation_time(df):
    initiation_time = df['Initiation_Latency'].mean()
    return initiation_time

In [5]:
# calculate total trials without correction trials
def total_trials_no_cts(df):
    main_trials = df[df['Correction_Trial']==0] # don't look at correction trials
    score = float(len(main_trials))
    return score

In [6]:
# columns to keep for the aggregated data
keep_list = ["Age (months)","Animal_Id","Cage ID","Chamber","Cohort","Database","Date","Experimenter",
            "Genotype","Group_Id","Housing","Lab site","S_Plus","Schedule","Sequential Day","Session_Id",
            "Sex","Single/Group","Strain","Time","Unique_Animal_ID","Unique_Cohort"]

# columns to drop from aggregated data
drop_list = ["Unnamed: 0","Back_Beam_Breaks","Check_for_errors","Correct_Response","Correction_Trial","Feeding_Time",
            "Front_Beam_Breaks","ITI_Length","ITI_Touches","Initiation_Latency","Initiation_Period_Touches",
            "Lights off","Post_Stimulus_Touches","Response_Latency","Reward_Collection_Latency",
            "S_Plus_Location","TimeStamp","Time_In_Trial","Time_To_Exit_Tray","Timeout_Length","Tray_Beam_Breaks",
            "Trial_Number","User","Weight"]

In [7]:
# columns to drop if not looking per day
drop_day = ["Day","Sequential Day","Session_Id","Time"]

In [8]:
def df_append(temp_df3,outdf):
    toappend = pd.concat([temp_df3,outdf],axis=1)
    return toappend

In [9]:
def agg_by_day(agg_df,temp_df1):
    days = temp_df1['Day'].unique()
    print("Day: ", end="")
    for day in days:
        print(" " + str(day) + " ", end="")
        temp_df2 = temp_df1[temp_df1['Day']==day]
        agg_df = aggregator(agg_df,temp_df2)
    print("")
    return agg_df

In [10]:
def aggregator(agg_df,temp_df2):
    #print("agg_df: ",temp_df2)
    #print("temp_df2: ",temp_df2)
    out_data = {'Percentage Correct'    :[pc_correct(temp_df2)],
                'Correction Trials'     :[cts(temp_df2)],
                'Initiation Time'       :[initiation_time(temp_df2)],
                'Total Trials No CTs'   :[total_trials_no_cts(temp_df2)]
               }

    temp_df3 = temp_df2[0:1]
    temp_df3 = temp_df3.drop(drop_list, axis=1)
    temp_df3 = temp_df3.reset_index(drop=True) # required to make the appending work
    outdf = pd.DataFrame(data=out_data)
    toappend = df_append(temp_df3,outdf)
    agg_df = agg_df.append(toappend,sort=True)
    return agg_df

In [11]:
# make a df with aggregated measures
def agg_df_gen(df,byday):
    agg_df = pd.DataFrame(columns = keep_list) # make new dataframe with old headers
    
    animals = df['Unique_Animal_ID'].unique()
    for animal in animals:
        print("Aggregating animal: ",animal)
        temp_df1 = df[df['Unique_Animal_ID']==animal]
        if(byday):
            agg_df = agg_by_day(agg_df,temp_df1)
        else:
            agg_df = aggregator(agg_df,temp_df1)
            agg_df = agg_df.drop(drop_day, axis=1)
                
    print("Finished aggregating!")
    return agg_df

In [12]:
def save_df(file,agg_df,byday):
    if(byday):
        agg_df.to_csv(file[:-4] + " - by day - aggregated.csv")
    else:
        agg_df.to_csv(file[:-4] + " - aggregated.csv")
    return

In [25]:
def date_fix(raw_df):
    raw_df.Date.astype(str)
    raw_df.Date = pd.to_datetime(raw_df.Date, format = "%Y%m%d")
    return raw_df

In [1]:
def main():
    
    file = input("Raw data file to aggregate please: ")
    
    print("Aggregating file: ",file)
    raw_df = pd.read_csv(file)

    # fix dates
    raw_df = date_fix(raw_df)

    # by day
    byday = 0
    agg_df = agg_df_gen(raw_df,byday)
    save_df(file,agg_df,byday)

    # by animal
    byday = 1
    agg_df = agg_df_gen(raw_df,byday)
    save_df(file,agg_df,byday)
    return

In [None]:
main()