In [1]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import re

## 1.1 Load Manor Lab Data

In [None]:
# Load ABR data
# Input directory for your csvs for Manor Lab
directory = '/Users/abhijeetherra/Documents/USF/MSDS_605/Manor_Lab_Cleaned'
all_files = glob.glob(os.path.join(directory, '*.csv'))
filtered_files = [file for file in all_files if 'click' not in file.lower()]
dfs = []

for filename in filtered_files:
    temp_df = pd.read_csv(filename)  
    temp_df['filename'] = os.path.basename(filename).split('.')[0]
    dfs.append(temp_df)
df = pd.concat(dfs, ignore_index=True)

df = df.dropna(axis=1, how='all')


In [None]:
df['mouse'] = df['filename'].apply(lambda x: x.split('_')[1])
df['lab'] = 'manor'

In [None]:
# Find the index of the column named '0'
zero_index = df.columns.get_loc('0')

# Select the desired columns
selected_columns = ['Freq(Hz)', 'Level(dB)'] + list(df.columns[zero_index:zero_index+244])

# Include the 'filename' column at the start
cols = ['lab', 'mouse', 'filename'] + selected_columns
df = df[cols]

In [None]:
# Load the thresholds.csv file
thresholds_path = '/Users/abhijeetherra/Documents/USF/MSDS_605/thresholds.csv'
thresholds = pd.read_csv(thresholds_path)

# Reshape the thresholds DataFrame to long format
thresholds_long = thresholds.melt(id_vars='Mouse ID', var_name='Freq(Hz)', value_name='Level(dB)')

# Filter out any rows where Freq(Hz) is not 'click'
thresholds_long = thresholds_long[thresholds_long['Freq(Hz)'] != 'click']

# Convert Freq(Hz) to float for consistency
thresholds_long['Freq(Hz)'] = thresholds_long['Freq(Hz)'].astype(float)

# Assuming df contains the relevant data with a 'Mouse ID' column
# Merge with the thresholds_long DataFrame on 'Mouse ID' and 'Freq(Hz)'
merged_df = pd.merge(df, thresholds_long, left_on=['filename', 'Freq(Hz)'], 
                     right_on=['Mouse ID', 'Freq(Hz)'], suffixes=('_df', '_thresholds'))

# Create the 'Hearing' column
merged_df['Hearing'] = (merged_df['Level(dB)_thresholds'] <= merged_df['Level(dB)_df']).astype(int)

# Drop the '_thresholds' column and rename '_df' column to 'Level(dB)'
df = merged_df.drop(columns=['Level(dB)_thresholds', 'Mouse ID'])
df = df.rename(columns={'Level(dB)_df': 'Level(dB)'})

# Reorder columns to move 'Hearing' after the first column
cols = df.columns.tolist()
cols.remove('Hearing')
cols.insert(1, 'Hearing')

# Final DataFrame
manor = df[cols]

# Display the first few rows of the final DataFrame
manor.head()

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,C1_1466_tone baseline,1,4000.0,90.0,-0.011945,-0.005725,-0.006315,-0.01201,-0.018338,-0.018333,...,-0.145216,-0.149022,-0.152122,-0.150222,-0.137818,-0.112896,-0.083062,-0.057848,-0.040609,-0.027605
1,C1_1466_tone baseline,1,4000.0,85.0,0.048654,0.038216,0.018615,-0.006889,-0.029496,-0.040601,...,-0.082421,-0.052643,-0.023004,0.001246,0.015699,0.019131,0.015952,0.00608,-0.013636,-0.039927
2,C1_1466_tone baseline,1,4000.0,80.0,-0.026901,-0.032692,-0.039477,-0.044559,-0.048618,-0.052769,...,-0.180535,-0.194214,-0.19931,-0.194015,-0.181708,-0.169527,-0.163665,-0.165396,-0.168519,-0.166072
3,C1_1466_tone baseline,1,4000.0,75.0,-0.021238,-0.02943,-0.041387,-0.051669,-0.057654,-0.060579,...,0.069082,0.066161,0.058178,0.048517,0.035875,0.021399,0.010143,0.005516,0.007441,0.011245
4,C1_1466_tone baseline,0,4000.0,70.0,0.033027,0.0523,0.068151,0.075574,0.078719,0.082563,...,0.088556,0.101187,0.110192,0.119786,0.134526,0.147978,0.149203,0.132705,0.106124,0.085295


## 1.2 Load Liberman Lab Data

In [6]:
files

[]

In [15]:
def CFTSread(PATH):
    with open(PATH, 'r', encoding='latin1') as file:
        data = file.readlines()

    data_start = False
    data_list = []
    for line in data: 
        if not data_start and line.startswith(':'): # Header lines
            if 'FREQ' in line:
                try:
                    freqs = [float(line.split('FREQ:')[1].split()[0].strip())*1000]
                except: #Click
                    freqs = [line.split('FREQ:')[1].split()[0].strip()]
            if 'LEVELS' in line:
                dbs = (line.split('LEVELS:')[1].strip()[:-1])
                dbs = [int(dB) for dB in dbs.split(';')]
            if 'SAMPLE' in line:
                sample_us = float(line.split('SAMPLE')[1].split(':')[1].strip())
            if 'Threshold' in line:
                # extract numeric part and convert to float (handles values like '80.0')
                try:
                    threshold_str = re.findall(r'[-+]?\d*\.?\d+', line.split('Threshold:')[1])[0]
                    threshold = int(float(threshold_str))
                except Exception:
                    print(f"No threshold found for file {PATH}, skipping")
                    return None, None
            if 'DATA' in line:
                data_start = True

        elif data_start:
            if len(line.strip()) == 0:
                continue
            data_list.append([float(d) for d in line.strip().split()])

    data = np.array(data_list)

    duration_ms = (data.shape[0]-1) * sample_us/1000
    rows = []
    
    for dB_i in range(len(dbs)):
        db = dbs[dB_i]
        data_col = data[:, dB_i]
        
        wave_data = {f'{i}': data_col[i] for i in range(len(data_col))}#, v in data_col} 
        row = {'Freq(Hz)': freqs[0], 'Level(dB)': db, 'Threshold': threshold, **wave_data}
        rows.append(row)
        
        df = pd.DataFrame(rows)
    return duration_ms, df

In [20]:
# Get a list of all files in directory and subdirectories
files = glob.glob('../../liberman_wpz/WPZ Electrophysiology/WPZ**/ABR*.tsv', recursive=True)

# Initialize an empty DataFrame to store all data
liberman = pd.DataFrame()

for filename in files:
    duration_ms, df = CFTSread(filename)
    if df is None:
        continue
    file_dir = os.path.basename(os.path.dirname(filename))
    df['mouse'] = file_dir
    df['filename'] = os.path.join(file_dir, os.path.basename(filename))
    df['lab'] = 'liberman'
    df['Hearing'] = (df['Level(dB)'] >= df['Threshold']).astype(int)

    # reorder columns:
    df = df[['lab', 'mouse', 'filename', 'Hearing'] + [col for col in df.columns if col not in ['lab', 'mouse', 'filename', 'Hearing']]]
    liberman = pd.concat([liberman, df], ignore_index=True).drop(columns=['Threshold'])
liberman


Unnamed: 0,lab,mouse,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,...,416,417,418,419,420,421,422,423,424,425
0,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,0,45200.0,70,-0.112169,-0.113246,-0.122275,-0.124777,...,0.010579,-0.008650,0.012416,0.025500,0.029871,0.010801,0.001044,-0.018724,-0.001364,-0.001554
1,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,0,45200.0,75,-0.111887,-0.099311,-0.071845,-0.035003,...,-0.080262,-0.079439,-0.110801,-0.129903,-0.123694,-0.104307,-0.091984,-0.102057,-0.086123,-0.041931
2,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,1,45200.0,80,-0.021953,-0.030063,-0.014731,-0.015047,...,0.060154,0.061517,0.039880,0.041021,0.031961,0.011053,-0.011376,0.000092,0.031549,0.021538
3,liberman,WPZ145,WPZ145/ABR-145-L-8.0.tsv,0,8000.0,30,0.108078,0.137191,0.130601,0.102756,...,-0.029211,-0.001429,0.013555,0.031359,0.043111,0.061010,0.068961,0.077736,0.094652,0.113089
4,liberman,WPZ145,WPZ145/ABR-145-L-8.0.tsv,1,8000.0,35,-0.089483,-0.066167,-0.058152,-0.077540,...,0.076757,0.093420,0.093515,0.108784,0.106345,0.110780,0.115944,0.099756,0.112776,0.124022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7326,liberman,WPZ101,WPZ101/ABR-101-L-32.0.tsv,1,32000.0,40,-0.025986,-0.022818,-0.036123,-0.038784,...,0.138356,0.161355,0.139877,0.143805,0.178145,0.187902,0.204565,0.181629,0.154576,0.186888
7327,liberman,WPZ101,WPZ101/ABR-101-L-32.0.tsv,1,32000.0,50,0.042785,-0.001438,-0.021332,-0.015884,...,-0.036145,-0.034245,-0.033358,-0.041214,0.004150,0.030126,0.012133,-0.051098,-0.086324,-0.009662
7328,liberman,WPZ101,WPZ101/ABR-101-L-32.0.tsv,1,32000.0,60,-0.048551,-0.010029,-0.024982,-0.006735,...,0.161555,0.175620,0.185757,0.158007,0.132157,0.142041,0.120372,0.109095,0.102759,0.070447
7329,liberman,WPZ101,WPZ101/ABR-101-L-32.0.tsv,1,32000.0,70,-0.027782,-0.022587,-0.063262,-0.065163,...,0.084157,0.095055,0.093661,0.089606,0.076048,0.078202,0.047410,0.022574,0.058054,0.077948


In [21]:
from scipy.interpolate import CubicSpline

def interpolate_and_smooth(y, target_length=244):
    x = np.linspace(0, 1, len(y))
    new_x = np.linspace(0, 1, target_length)
    
    if len(y) == target_length:
        final = y
    elif len(y) > target_length:
        interpolated_values = np.interp(new_x, x, y).astype(float)
        final = pd.Series(interpolated_values)
    elif len(y) < target_length:
        cs = CubicSpline(x, y)
        final = cs(new_x)

    return pd.Series(final)

new_df = pd.DataFrame()
wave_cols = [col for col in liberman.columns if col.isnumeric() == True]
non_wave_cols = [col for col in liberman.columns if col not in wave_cols]

all_rows = []

for index, row in liberman.iterrows():
    wave_data = liberman.loc[index, '0':].astype(float)
    tenms = int((10/17)*len(wave_data))
    final = interpolate_and_smooth(wave_data[:tenms], 244)
    
    non_wave_data = row.drop(wave_cols)
    row_dict = {'lab': 'liberman', **dict(non_wave_data)}
    for i in range(244):
        row_dict[str(i)] = final.iloc[i]
    
    all_rows.append(row_dict)

new_df = pd.DataFrame(all_rows)

liberman = new_df
liberman.head()

Unnamed: 0,lab,mouse,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,...,234,235,236,237,238,239,240,241,242,243
0,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,0,45200.0,70,-0.112169,-0.113469,-0.122399,-0.122348,...,-0.015834,-0.007148,-0.001957,-0.007048,0.013305,-0.013465,-0.030808,-0.008957,0.006171,0.018309
1,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,0,45200.0,75,-0.111887,-0.098633,-0.070026,-0.033426,...,-0.125784,-0.140849,-0.127058,-0.123276,-0.157175,-0.155424,-0.131359,-0.090885,-0.054182,-0.088277
2,liberman,WPZ145,WPZ145/ABR-145-L-45.2.tsv,1,45200.0,80,-0.021953,-0.029684,-0.014747,-0.014078,...,-0.023139,-0.025058,-0.016326,-0.002217,-0.03555,-0.072213,-0.073718,-0.059498,-0.033153,-0.046919
3,liberman,WPZ145,WPZ145/ABR-145-L-8.0.tsv,0,8000.0,30,0.108078,0.137028,0.129226,0.100679,...,-0.028141,-0.032051,-0.033407,-0.04765,-0.076915,-0.081545,-0.069232,-0.075617,-0.061973,-0.060731
4,liberman,WPZ145,WPZ145/ABR-145-L-8.0.tsv,1,8000.0,35,-0.089483,-0.065969,-0.059109,-0.077054,...,0.086141,0.076618,0.060576,0.051416,0.073083,0.099807,0.100479,0.111378,0.10877,0.110432


## 1.3 Marcotti Data

In [None]:
import pandas as pd
import os

# Initialize an empty dataframe
all_data = pd.DataFrame()

# Iterate over each CSV file in the directory
for filename in os.listdir('Marcotti_ABR'):
    if filename.endswith('.csv'):
        # Read the CSV file into a dataframe
        df = pd.read_csv(os.path.join('Marcotti_ABR', filename))
        
        # Add a new column with the filename
        df.insert(0, 'filename', filename)
        
        # Append the dataframe to the combined dataframe
        all_data = pd.concat([all_data, df])

# Reset the index of the combined dataframe
all_data.reset_index(drop=True, inplace=True)

In [None]:
all_data['filename'] = all_data['filename'].str.replace('.csv','')

In [None]:
thresholds = pd.read_csv('thresholds.csv')
thresholds = thresholds.rename(columns={'Mouse ID': 'filename'})

In [None]:
import pandas as pd

# Assuming all_data and thresholds are your dataframes

# First, let's melt the thresholds dataframe to long format
thresholds_melt = thresholds.melt(id_vars='filename', var_name='Freq(Hz)', value_name='threshold')

# Convert 'Freq(Hz)' to numeric, as it's currently a string because of the melt operation
thresholds_melt['Freq(Hz)'] = pd.to_numeric(thresholds_melt['Freq(Hz)'])

# Convert 'filename' in both dataframes to string
all_data['filename'] = all_data['filename'].astype(str)
thresholds_melt['filename'] = thresholds_melt['filename'].astype(str)

# Now, you can merge the dataframes
all_data = pd.merge(all_data, thresholds_melt, on=['filename', 'Freq(Hz)'], how='left')


# Now, we can merge the dataframes
all_data = pd.merge(all_data, thresholds_melt, on=['filename', 'Freq(Hz)'], how='left')


In [None]:
all_data = all_data.iloc[:, :-1]

In [None]:
cols = list(all_data.columns)
cols.insert(1, cols.pop())
all_data = all_data[cols]

In [None]:
all_data.insert(all_data.columns.get_loc('threshold_x'), 'Hearing', all_data.apply(lambda row: 1 if row['Level(dB)'] >= row['threshold_x'] else 0, axis=1))
all_data = all_data.drop(columns=['threshold_x'])

In [None]:
all_data = all_data.loc[:,'filename':'976']

In [None]:
all_data

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,967,968,969,970,971,972,973,974,975,976
0,97,0,100.0,0.0,0.033791,0.039729,0.047360,0.056843,0.065612,0.070652,...,0.348105,0.341615,0.333120,0.324244,0.316473,0.310317,0.305540,0.301918,0.299239,0.296636
1,97,0,100.0,5.0,0.265480,0.272649,0.276348,0.275191,0.269755,0.262055,...,-0.128397,-0.130201,-0.130415,-0.130186,-0.132303,-0.137713,-0.142897,-0.145481,-0.148816,-0.151563
2,97,0,100.0,10.0,0.037163,0.024569,0.013040,0.003256,-0.003927,-0.007555,...,0.137230,0.141710,0.143284,0.142663,0.141399,0.139028,0.133610,0.124458,0.111841,0.098145
3,97,0,100.0,15.0,-0.019905,-0.026373,-0.033627,-0.038605,-0.039325,-0.034740,...,0.070884,0.086998,0.098786,0.106550,0.110857,0.112451,0.111858,0.109425,0.104791,0.097083
4,97,0,100.0,20.0,0.160114,0.161109,0.161414,0.161318,0.160955,0.159810,...,-0.088411,-0.090558,-0.092034,-0.096604,-0.101225,-0.104872,-0.106351,-0.104073,-0.097163,-0.085631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,-0.118377,-0.119035,-0.118623,-0.118036,-0.117730,-0.117819,...,0.259145,0.262498,0.271489,0.284537,0.297751,0.308969,0.317394,0.322188,0.322519,0.319128
16222,71,1,30000.0,95.0,0.082913,0.088466,0.096360,0.105286,0.113020,0.116751,...,-0.267053,-0.263192,-0.261740,-0.265535,-0.271114,-0.278002,-0.285858,-0.295151,-0.306930,-0.320709
16223,71,1,24000.0,95.0,-0.094127,-0.090920,-0.084665,-0.076653,-0.066335,-0.055525,...,-0.820263,-0.816483,-0.812340,-0.807451,-0.800046,-0.789668,-0.777439,-0.765569,-0.755071,-0.746031
16224,71,1,18000.0,95.0,-0.232872,-0.223349,-0.213287,-0.202847,-0.190474,-0.177575,...,-0.718762,-0.724659,-0.726310,-0.723738,-0.718188,-0.710650,-0.701895,-0.691927,-0.680184,-0.665809


In [None]:
wave_cols = [str(i) for i in range(976)]
new_points = np.linspace(0, 976, 244)

# Create a new DataFrame to store the interpolated values
new_df = pd.DataFrame()

# Apply linear interpolation to each row
print(len(new_points))


244


In [None]:
for index, row in all_data.iterrows():
    wave_data = row[wave_cols].astype(float)
    non_wave_data = row.drop(wave_cols)
    interpolated_values = np.interp(new_points, np.arange(976), wave_data)
    new_row = pd.concat([non_wave_data, pd.Series(interpolated_values)], ignore_index=True)
    new_df = pd.concat([new_df, new_row.to_frame().T], ignore_index=True)

In [None]:
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,248
0,97,0,100.0,0.0,0.296636,0.033791,0.065695,0.066535,0.051126,0.041897,...,0.118143,0.174712,0.236451,0.271457,0.32267,0.347076,0.348439,0.341828,0.310418,0.299239
1,97,0,100.0,5.0,-0.151563,0.26548,0.269628,0.245469,0.226786,0.204498,...,-0.018511,0.009673,-0.015604,-0.054524,-0.086791,-0.119809,-0.123298,-0.130141,-0.137624,-0.148816
2,97,0,100.0,10.0,0.098145,0.037163,-0.003987,-0.003464,0.007656,0.020094,...,0.026247,0.036444,0.039849,0.059662,0.103865,0.114026,0.114591,0.141563,0.139067,0.111841
3,97,0,100.0,15.0,0.097083,-0.019905,-0.039249,-0.000669,0.016501,0.005105,...,-0.200474,-0.196833,-0.203717,-0.233226,-0.21554,-0.125194,-0.004055,0.086468,0.112425,0.104791
4,97,0,100.0,20.0,-0.085631,0.160114,0.160936,0.146369,0.129469,0.118032,...,0.033685,0.09628,0.132701,0.094899,0.047131,-0.010179,-0.065745,-0.090487,-0.104812,-0.097163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,0.319128,-0.118377,-0.117731,-0.125463,-0.152373,-0.198541,...,0.523606,0.503557,0.483672,0.428102,0.363084,0.319916,0.272272,0.262388,0.308785,0.322519
16222,71,1,30000.0,95.0,-0.320709,0.082913,0.113082,0.118743,0.117828,0.100142,...,-0.013549,-0.040811,-0.071388,-0.116064,-0.158196,-0.221033,-0.265463,-0.263319,-0.277889,-0.30693
16223,71,1,24000.0,95.0,-0.746031,-0.094127,-0.066157,-0.037672,-0.032352,0.004445,...,-0.782004,-0.829512,-0.866106,-0.900933,-0.896161,-0.852806,-0.827938,-0.816608,-0.789839,-0.755071
16224,71,1,18000.0,95.0,-0.665809,-0.232872,-0.190261,-0.138403,-0.112954,-0.11619,...,-0.460814,-0.497342,-0.590164,-0.625124,-0.649152,-0.668249,-0.690185,-0.724465,-0.710774,-0.680184


In [None]:
# Assuming df is your DataFrame
new_df.columns = ["filename", "Hearing", "Freq(Hz)", "Level(dB)"] + new_df.columns[4:].tolist()

# Drop the 9 columns after "Level(dB)"
new_df = new_df.drop(new_df.columns[4], axis=1)


In [None]:
new_df.columns = ['filename', 'Hearing', 'Freq(Hz)', 'Level(dB)'] + list(range(len(new_df.columns) - 4))

In [None]:
new_df

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,97,0,100.0,0.0,0.033791,0.065695,0.066535,0.051126,0.041897,0.03876,...,0.118143,0.174712,0.236451,0.271457,0.32267,0.347076,0.348439,0.341828,0.310418,0.299239
1,97,0,100.0,5.0,0.26548,0.269628,0.245469,0.226786,0.204498,0.201019,...,-0.018511,0.009673,-0.015604,-0.054524,-0.086791,-0.119809,-0.123298,-0.130141,-0.137624,-0.148816
2,97,0,100.0,10.0,0.037163,-0.003987,-0.003464,0.007656,0.020094,0.008527,...,0.026247,0.036444,0.039849,0.059662,0.103865,0.114026,0.114591,0.141563,0.139067,0.111841
3,97,0,100.0,15.0,-0.019905,-0.039249,-0.000669,0.016501,0.005105,0.018298,...,-0.200474,-0.196833,-0.203717,-0.233226,-0.21554,-0.125194,-0.004055,0.086468,0.112425,0.104791
4,97,0,100.0,20.0,0.160114,0.160936,0.146369,0.129469,0.118032,0.102733,...,0.033685,0.09628,0.132701,0.094899,0.047131,-0.010179,-0.065745,-0.090487,-0.104812,-0.097163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,-0.118377,-0.117731,-0.125463,-0.152373,-0.198541,-0.197439,...,0.523606,0.503557,0.483672,0.428102,0.363084,0.319916,0.272272,0.262388,0.308785,0.322519
16222,71,1,30000.0,95.0,0.082913,0.113082,0.118743,0.117828,0.100142,0.069171,...,-0.013549,-0.040811,-0.071388,-0.116064,-0.158196,-0.221033,-0.265463,-0.263319,-0.277889,-0.30693
16223,71,1,24000.0,95.0,-0.094127,-0.066157,-0.037672,-0.032352,0.004445,0.059592,...,-0.782004,-0.829512,-0.866106,-0.900933,-0.896161,-0.852806,-0.827938,-0.816608,-0.789839,-0.755071
16224,71,1,18000.0,95.0,-0.232872,-0.190261,-0.138403,-0.112954,-0.11619,-0.104785,...,-0.460814,-0.497342,-0.590164,-0.625124,-0.649152,-0.668249,-0.690185,-0.724465,-0.710774,-0.680184


In [None]:
non_wave_cols = ['filename', 'Hearing', 'Freq(Hz)', 'Level(dB)']
new_cols = non_wave_cols + [str(i) for i in range(244)]
new_df.columns = new_cols

In [None]:
new_df['mouse'] = new_df['filename'].astype(str)
new_df['lab'] = 'marcotti'

# move 'lab' and 'mouse' to the front
cols = new_df.columns.tolist()
cols = ['lab', 'mouse'] + [col for col in cols if col not in ['lab', 'mouse']]
new_df = new_df[cols]
marcotti = new_df

## 1.4 Combine Liberman, Manor, Marcotti Data

In [None]:
df = pd.concat([manor, liberman, marcotti])

In [None]:
df.columns = list(df.columns[:4]) + [str(int(col)) for col in df.columns[4:]]

for col in df.columns[4:]:  
    df[col] = df[col].astype(float)

In [None]:
df.to_csv('liberman_manor_marcotti.csv', index=False)