In [1]:
import os
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
% matplotlib inline
sns.set_style('whitegrid')

In [2]:
language_1 = [1,2,2,2,1,1,2,1,1,2,1,2,1,1,2,2,1,1,2,1,2,2,1,2,2,2,1,2,1,2,1,1]
language_2 = [1,1,2,1,1,1,2,2,2,2,1,1,1,2,2,1,2,2,1,1,2,1,2,1,2,1,2,1,1,2,2,2]

kids_cols = [
    'rt', 'responses', 'trial_type', 'trial_index', 
    'time_elapsed', 'internal_node_id', 'stimulus', 
    'key_press', 'cond', 'targ', 'value'

]

"""
these require someone to check column headers
# TSL 1, 16, 26, 29, 36, 40, 41
# VSL 3, 4, 7, 16, 30, 34, 36
"""

base_dir ='/home/yoel/Desktop/SL/data_from_server/data_stim_added/'
sub_dirs = 'vsl tsl'.split(' ')

def add_cols(data, directory):
    columns_adults = [
        'rt','responses','trial_type',
        'trial_index','time_elapsed',
        'internal_node_id','stimulus',
        'key_press','cond','targ','value'
    ]
    if directory == 'tsl_adults' or directory == 'vsl_adults':
        data.columns = columns_adults
    return data

data_store = {
    #'tsl_adults': [],
    #'vsl_adults': [],
    'tsl_kids': [],
    'vsl_kids': []
}

for key in data_store.keys():
    if (key == 'tsl_adults' or key == 'vsl_adults'):
        for data in os.listdir(os.path.join(base_dir, key)):
            data_store[key].append(add_cols(
                data=pd.read_csv(os.path.join(base_dir, key, data), header=None),
                directory=key
            ))
    elif (key == 'tsl_kids' or key == 'vsl_kids'):
        for data in os.listdir(os.path.join(base_dir, key)):
            df = pd.read_csv(os.path.join(base_dir, key, data))
            if int(df.ix[0, 1][7:11]) in [3130, 3161, 3224, 3236, 3330, 3331, 3332]:
                df = pd.read_csv(os.path.join(base_dir, key, data), header=None)
                df.columns = kids_cols
            data_store[key].append(df)

In [3]:
def vsl_rt(data):
    """
    first item does not count, c is indexed accordingly
    returns response times as a list of tuples, (n_trial, n_response_time)
    input is single subject data
    """
    c = []
    data.index = [x for x in range(data.shape[0])]
    for row in range(309):
        try:
            if (data.loc[row, 'targ'] in data.loc[row, 'stimulus']) and \
            (int(data.loc[row, 'rt']) != -1) and (int(data.loc[row, 'rt']) < 1000):
                c.append((row, int(data.loc[row, 'rt'])))
        except TypeError:
            pass
    return c

def tsl_rt(data):
    something = []
    data.index = [x for x in range(data.shape[0])]
    for row in range(602):
        try:
            if data.loc[row, 'targ'] in data.loc[row, 'stimulus']:
                if int(data.loc[(row - 2), 'rt']) != -1000:
                    something.append((
                                      row - 2,
                                      int(data.loc[(row - 2), 'rt'])
                                     ))
                elif int(data.loc[(row - 1), 'rt']) != -1000:
                    something.append((
                                      row - 1,
                                      int(data.loc[(row - 1), 'rt'])
                                     ))
                elif int(data.loc[(row), 'rt']) != -1000:
                    something.append((
                                      row,
                                      int(data.loc[row, 'rt'])
                                     ))
                elif int(data.loc[(row + 1), 'rt']) != -1000:
                    something.append((
                                      row + 1,
                                      int(data.loc[(row + 1), 'rt'])
                                     ))
                elif int(data.loc[(row + 2), 'rt']) != -1000:
                    something.append((
                                      row + 2,
                                      int(data.loc[(row + 2), 'rt'])
                                     ))
        except TypeError:
            pass
    return something

def acc_vsl(data):
    language = data.cond[100]
    fc_idx = 309
    trials = []
    res = data.loc[fc_idx:, ['key_press','stimulus']]
    res.index = [x for x in range(res.shape[0])]
    for i in range(res.shape[0]):
        if (int(res.loc[i, 'key_press']) != -1) and (pd.isnull(res.loc[i, 'stimulus'])):
            trials.append(int(res.loc[i, 'key_press']))
    converted = []
    for val in trials:
        if int(val) == 49:
            converted.append(1)
        elif int(val) == 50:
                converted.append(2)
    if language == 'lang1':
        pat = language_1
    elif language == 'lang2':
        pat = language_2   
    return (np.array(converted) == np.array(pat)).mean() 

def acc_tsl(data_frame):
    language = data_frame.cond[100]
    # 37 -> left_arrow, 39 -> right_arrow
    fc_idx = 607
    responses = data_frame.loc[fc_idx:, ['key_press','stimulus']]
    responses.index= [x for x in range(responses.shape[0])]
    idxs = []
    converted = []
    c = 0
    i = 6
    while c < 224:
        idxs.append(i)
        i += 7
        c += 7
    responses = responses.loc[idxs, 'key_press']
    for val in responses:
        if int(val) == 37:
            converted.append(1)
        elif int(val) == 39:
            converted.append(2)
    if language == 'lang1':
        pat = language_1
    elif language == 'lang2':
        pat = language_2
    return (np.array(converted) == np.array(pat)).mean()

In [4]:
def phase1_tbt_vsl(data, par_type):
    """
    return single participant response time
    target trial response time
    """
    if par_type == 'kid':
        vsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        vsl_id = data['responses'][0][7:12]
    keep = ['rt', 'id', 'cond', 'targ', 'trial_index', 'trial_mask']
    data.index = [x for x in range(data.shape[0])]
    indexes = []
    for idx in range(309):
        try:
            if data.loc[idx, 'targ'] in data.loc[idx, 'stimulus']:
                if int(data.loc[idx, 'rt']) < 1000 and int(data.loc[idx, 'rt']) != -1:
                    indexes.append(idx)
        except TypeError:
            pass
    new = data.loc[indexes]
    new.index = [x for x in range(new.shape[0])]
    new['id'] = [vsl_id for x in range(new.shape[0])]
    new['trial_mask'] = [x for x in range(new.shape[0])]
    return new[keep]

def phase1_tbt_tsl(data, par_type):
    """
    return single participant response time
    target trial response time
    """
    indexes = []
    if par_type == 'kid':
        tsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        tsl_id = data['responses'][0][7:12]
    keep = ['rt', 'id', 'cond', 'targ', 'trial_index', 'trial_mask']
    data.index = [x for x in range(data.shape[0])]
    for row in range(602):
        try:
            if data.loc[row, 'targ'] in data.loc[row, 'stimulus']:
                if int(data.loc[(row - 2), 'rt']) != -1000:
                    indexes.append(row - 2)
                elif int(data.loc[(row - 1), 'rt']) != -1000:
                    indexes.append(row - 1)
                elif int(data.loc[(row), 'rt']) != -1000:
                    indexes.append(row)
                elif int(data.loc[(row + 1), 'rt']) != -1000:
                    indexes.append(row + 1)
                elif int(data.loc[(row + 2), 'rt']) != -1000:
                    indexes.append(row + 2)
        except TypeError:
            pass
    new = data.loc[indexes]
    new.index = [x for x in range(new.shape[0])]
    new['id'] = [tsl_id for x in range(new.shape[0])]
    new['trial_mask'] = [x for x in range(new.shape[0])]
    return new[keep]

In [5]:
vsl_trial_by_trial = []
for par in data_store['vsl_kids']:
    vsl_trial_by_trial.append(phase1_tbt_vsl(par, 'kid'))
pd.concat(vsl_trial_by_trial, axis=0).to_csv('kids_vsl_tbt_famphase.csv', index=None)

#vsl_trial_by_trial = []
#for par in data_store['vsl_adults']:
#    vsl_trial_by_trial.append(phase1_tbt_vsl(par, 'adult'))
#pd.concat(vsl_trial_by_trial, axis=0).to_csv('adults_vsl_tbt_famphase.csv', index=None)

tsl_trial_by_trial = []
for par in data_store['tsl_kids']:
    tsl_trial_by_trial.append(phase1_tbt_tsl(par, 'kid'))
pd.concat(tsl_trial_by_trial, axis=0).to_csv('kids_tsl_tbt_famphase.csv', index=None)

#tsl_trial_by_trial = []
#for par in data_store['tsl_adults']:
#    tsl_trial_by_trial.append(phase1_tbt_tsl(par, 'adult'))
#pd.concat(tsl_trial_by_trial, axis=0).to_csv('adults_tsl_tbt_famphase.csv', index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
#  trial-by-trial test button pressing data (all trials): to use multilevel modeling to test learning.

def phase2_tbt_vsl(data, par_type):
    if par_type == 'kid':
        vsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        vsl_id = data['responses'][0][7:12]
    language = data.cond[100]
    fc_idx = 309
    trials, indexes = [], []
    keep = ['rt', 'cond', 'targ', 'id', 'par_answer', 'expected_answer', 'trial_index', 'trial_mask']
    res = data.loc[fc_idx:, ['key_press','stimulus','rt','cond','targ', 'trial_index']]
    res.index = [x for x in range(res.shape[0])]
    for i in range(res.shape[0]):
        if (int(res.loc[i, 'key_press']) != -1) and (pd.isnull(res.loc[i, 'stimulus'])):
            trials.append(int(res.loc[i, 'key_press']))
            indexes.append(i)
    converted = []
    for val in trials:
        if int(val) == 49:
            converted.append(1)
        elif int(val) == 50:
                converted.append(2)
    if language == 'lang1':
        pat = language_1
    elif language == 'lang2':
        pat = language_2   
    out = res.loc[indexes]
    out['id'] = [vsl_id for i in range(out.shape[0])]
    out['par_answer'] = converted
    out['expected_answer'] = pat
    out['trial_mask'] = [x for x in range(out.shape[0])]
    return out[keep]


def phase2_tbt_tsl(data, par_type):
    if par_type == 'kid':
        tsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        tsl_id = data['responses'][0][7:12]
    language = data.cond[100]
    # 37 -> left_arrow, 39 -> right_arrow
    fc_idx = 607
    keep = ['rt', 'cond', 'targ', 'id', 'par_answer', 'expected_answer', 'trial_index', 'trial_mask']
    responses = data.loc[fc_idx:, ['key_press','stimulus','rt','cond','targ', 'trial_index']]
    responses.index= [x for x in range(responses.shape[0])]
    idxs = []
    converted = []
    c = 0
    i = 6
    while c < 224:
        idxs.append(i)
        i += 7
        c += 7
    res = responses.loc[idxs, 'key_press']
    for val in res:
        if int(val) == 37:
            converted.append(1)
        elif int(val) == 39:
            converted.append(2)
    if language == 'lang1':
        pat = language_1
    elif language == 'lang2':
        pat = language_2
    out = responses.loc[idxs]
    out['id'] = [tsl_id for i in range(out.shape[0])]
    out['par_answer'] = converted
    out['expected_answer'] = pat
    out['trial_mask'] = [x for x in range(out.shape[0])]
    return out[keep]

In [7]:
vsl_trial_by_trial_test = []
for par in data_store['vsl_kids']:
    vsl_trial_by_trial_test.append(phase2_tbt_vsl(par, 'kid'))
pd.concat(vsl_trial_by_trial_test, axis=0).to_csv('kids_vsl_tbt_testphase.csv', index=None)

#vsl_trial_by_trial_test = []
#for par in data_store['vsl_adults']:
#    vsl_trial_by_trial_test.append(phase2_tbt_vsl(par, 'adult'))
#pd.concat(vsl_trial_by_trial_test, axis=0).to_csv('adults_vsl_tbt_testphase.csv', index=None)

tsl_trial_by_trial_test = []
for par in data_store['tsl_kids']:
    tsl_trial_by_trial_test.append(phase2_tbt_tsl(par, 'kid'))
pd.concat(tsl_trial_by_trial_test, axis=0).to_csv('kids_tsl_tbt_testphase.csv', index=None)

#tsl_trial_by_trial_test = []
#for par in data_store['tsl_adults']:
#    tsl_trial_by_trial_test.append(phase2_tbt_tsl(par, 'adult'))
#pd.concat(tsl_trial_by_trial_test, axis=0).to_csv('adults_tsl_tbt_testphase.csv', index=None)

In [8]:
def phase1_tbt_vsl_fp(data, par_type):
    if par_type == 'kid':
        vsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        vsl_id = data['responses'][0][7:12]
    keep = ['rt', 'id', 'cond', 'targ', 'stimulus', 'key_press', 'trial_index', 'trial_mask']
    data.index = [x for x in range(data.shape[0])]
    indexes = []
    for idx in range(309):
        try:
            if not pd.isnull(data.loc[idx, 'key_press']) and not pd.isnull(data.loc[idx, 'stimulus']) \
            and int(data.loc[idx, 'rt']) < 1200  and int(data.loc[idx, 'rt']) != -1:
                    indexes.append(idx)
        except TypeError:
            pass
    new = data.loc[indexes]
    new.index = [x for x in range(new.shape[0])]
    new['id'] = [vsl_id for x in range(new.shape[0])]
    new['trial_mask'] = [x for x in range(new.shape[0])]
    return new[keep]

def phase1_tbt_tsl_fp(data, par_type):
    indexes = []
    if par_type == 'kid':
        tsl_id = data['responses'][0][7:11]
    elif par_type == 'adult':
        tsl_id = data['responses'][0][7:12]
    keep = ['rt', 'id', 'cond', 'targ', 'stimulus', 'key_press', 'trial_index','trial_mask']
    data.index = [x for x in range(data.shape[0])]
    for row in range(602):
        try:
            if not pd.isnull(data.loc[row, 'key_press']) and int(data.loc[row, 'rt']) != -1000 \
            and not pd.isnull(data.loc[row, 'stimulus']):
                if int(data.loc[(row - 2), 'rt']) != -1000:
                    indexes.append(row - 2)
                elif int(data.loc[(row - 1), 'rt']) != -1000:
                    indexes.append(row - 1)
                elif int(data.loc[(row), 'rt']) != -1000:
                    indexes.append(row)
                elif int(data.loc[(row + 1), 'rt']) != -1000:
                    indexes.append(row + 1)
                elif int(data.loc[(row + 2), 'rt']) != -1000:
                    indexes.append(row + 2)
        except TypeError:
            pass
    new = data.loc[indexes]
    new.index = [x for x in range(new.shape[0])]
    new['id'] = [tsl_id for x in range(new.shape[0])]
    new['trial_mask'] = [x for x in range(new.shape[0])]
    return new[keep]

In [9]:
vsl_trial_by_trial_fp = []
for par in data_store['vsl_kids']:
    vsl_trial_by_trial_fp.append(phase1_tbt_vsl_fp(par, 'kid'))
pd.concat(vsl_trial_by_trial_fp, axis=0).to_csv('kids_vsl_tbt_famphase_falsePos.csv', index=None)

#vsl_trial_by_trial_fp = []
#for par in data_store['vsl_adults']:
#    vsl_trial_by_trial_fp.append(phase1_tbt_vsl_fp(par, 'adult'))
#pd.concat(vsl_trial_by_trial_fp, axis=0).to_csv('adults_vsl_tbt_famphase_falsePos.csv', index=None)

tsl_trial_by_trial_fp = []
for par in data_store['tsl_kids']:
    tsl_trial_by_trial_fp.append(phase1_tbt_tsl_fp(par, 'kid'))
pd.concat(tsl_trial_by_trial_fp, axis=0).to_csv('kids_tsl_tbt_famphase_falsePos.csv', index=None)

#tsl_trial_by_trial_fp = []
#for par in data_store['tsl_adults']:
#    tsl_trial_by_trial_fp.append(phase1_tbt_tsl_fp(par, 'adult'))
#pd.concat(tsl_trial_by_trial_fp, axis=0).to_csv('adults_tsl_tbt_famphase_falsePos.csv', index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
