In [None]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import re

## 1.1 Load Manor Lab Data

In [None]:
# Load ABR data
# Input directory for your csvs for Manor Lab
directory = '/Users/abhijeetherra/Documents/USF/MSDS_605/Manor_Lab_Cleaned'
all_files = glob.glob(os.path.join(directory, '*.csv'))
filtered_files = [file for file in all_files if 'click' not in file.lower()]
dfs = []

for filename in filtered_files:
    temp_df = pd.read_csv(filename)  
    temp_df['filename'] = os.path.basename(filename).split('.')[0]
    dfs.append(temp_df)
df = pd.concat(dfs, ignore_index=True)

df = df.dropna(axis=1, how='all')


In [None]:
# Find the index of the column named '0'
zero_index = df.columns.get_loc('0')

# Select the desired columns
selected_columns = ['Freq(Hz)', 'Level(dB)'] + list(df.columns[zero_index:zero_index+244])

# Include the 'filename' column at the start
cols = ['filename'] + selected_columns
df = df[cols]

In [None]:
# Load the thresholds.csv file
thresholds_path = '/Users/abhijeetherra/Documents/USF/MSDS_605/thresholds.csv'
thresholds = pd.read_csv(thresholds_path)

# Reshape the thresholds DataFrame to long format
thresholds_long = thresholds.melt(id_vars='Mouse ID', var_name='Freq(Hz)', value_name='Level(dB)')

# Filter out any rows where Freq(Hz) is not 'click'
thresholds_long = thresholds_long[thresholds_long['Freq(Hz)'] != 'click']

# Convert Freq(Hz) to float for consistency
thresholds_long['Freq(Hz)'] = thresholds_long['Freq(Hz)'].astype(float)

# Assuming df contains the relevant data with a 'Mouse ID' column
# Merge with the thresholds_long DataFrame on 'Mouse ID' and 'Freq(Hz)'
merged_df = pd.merge(df, thresholds_long, left_on=['filename', 'Freq(Hz)'], 
                     right_on=['Mouse ID', 'Freq(Hz)'], suffixes=('_df', '_thresholds'))

# Create the 'Hearing' column
merged_df['Hearing'] = (merged_df['Level(dB)_thresholds'] <= merged_df['Level(dB)_df']).astype(int)

# Drop the '_thresholds' column and rename '_df' column to 'Level(dB)'
df = merged_df.drop(columns=['Level(dB)_thresholds', 'Mouse ID'])
df = df.rename(columns={'Level(dB)_df': 'Level(dB)'})

# Reorder columns to move 'Hearing' after the first column
cols = df.columns.tolist()
cols.remove('Hearing')
cols.insert(1, 'Hearing')

# Final DataFrame
manor = df[cols]

# Display the first few rows of the final DataFrame
manor.head()

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,C1_1466_tone baseline,1,4000.0,90.0,-0.011945,-0.005725,-0.006315,-0.01201,-0.018338,-0.018333,...,-0.145216,-0.149022,-0.152122,-0.150222,-0.137818,-0.112896,-0.083062,-0.057848,-0.040609,-0.027605
1,C1_1466_tone baseline,1,4000.0,85.0,0.048654,0.038216,0.018615,-0.006889,-0.029496,-0.040601,...,-0.082421,-0.052643,-0.023004,0.001246,0.015699,0.019131,0.015952,0.00608,-0.013636,-0.039927
2,C1_1466_tone baseline,1,4000.0,80.0,-0.026901,-0.032692,-0.039477,-0.044559,-0.048618,-0.052769,...,-0.180535,-0.194214,-0.19931,-0.194015,-0.181708,-0.169527,-0.163665,-0.165396,-0.168519,-0.166072
3,C1_1466_tone baseline,1,4000.0,75.0,-0.021238,-0.02943,-0.041387,-0.051669,-0.057654,-0.060579,...,0.069082,0.066161,0.058178,0.048517,0.035875,0.021399,0.010143,0.005516,0.007441,0.011245
4,C1_1466_tone baseline,0,4000.0,70.0,0.033027,0.0523,0.068151,0.075574,0.078719,0.082563,...,0.088556,0.101187,0.110192,0.119786,0.134526,0.147978,0.149203,0.132705,0.106124,0.085295


## 1.2 Load Liberman Lab Data

In [None]:
# Get a list of all files in directory and subdirectories
files = glob.glob('ABR_Data_Liberman/**/*ABR*.tsv', recursive=True)

# Initialize an empty DataFrame to store all data
all_data = pd.DataFrame()

for filename in files:
    try:
        with open(filename, 'rb') as f:
            content = f.read()
        content = content.replace(b'\xb5', b' ')

        with open('ABR_clean.tsv', 'wb') as f:
            f.write(content)

        with open('ABR_clean.tsv', 'r', newline='\n', encoding='utf-8') as f_in:
            content = f_in.read()

        with open('ABR_clean_normalized.tsv', 'w', newline='\n', encoding='utf-8') as f_out:
            f_out.write(content)

        with open('ABR_clean_normalized.tsv', 'rb') as f:
            lines = f.readlines()

        # Now `lines` is a list of lines from the file, and you can process it as needed

        # Assuming `lines` is your list of byte strings
        data = [line.decode('utf-8').strip().split('\t') for line in lines]

        # Convert list of lists into a DataFrame
        df = pd.DataFrame(data)
        frequency = df.iloc[0,6]
        frequency = float(re.findall(r'\d+\.\d+', frequency)[0])*1000
        threshold = df.iloc[0,10]
        threshold = float(threshold[-4:])
        value = df.iloc[2, 0]
        # Extract the data from df.iloc[2,0]
        data_str = df.iat[2, 0]

        # Split the string into parts
        parts = data_str.split('\r')
        value = float(parts[-1])
        levels_str = parts[1].split(':')[-1].split(';')
        levels = [int(level) for level in levels_str if level.isdigit()]
        df.iat[2, 0] = value  # Set the entry in df.iloc[2,0] to the extracted value
        df = df.iloc[1:]  # Remove the first row
        #df = df.iloc[:, :-1]
        df = df.iloc[:, :len(levels)]
        df.iloc[1] = levels  # Set the second row to the extracted levels
        df = df.iloc[1:]  # Remove the first row
        df = df.iloc[:-1]
        df = df.T


        def extract_filename(full_path):
            # Split the path into components
            parts = full_path.split('/')
            # Get the filename component
            filename = parts[-2]
            # Split the filename into parts
            filename_parts = filename.split('-')
            # Return the desired part
            return filename_parts[0]
        df.insert(loc=0, column='filename', value=extract_filename(filename))

        df['Hearing'] = df[2].apply(lambda x: 1 if x >= threshold else 0)
        cols = df.columns.tolist()
        cols = cols[0:1] + ['Hearing'] + cols[1:-1]
        df = df[cols]
        df.insert(loc=2, column='Freq(Hz)', value=frequency)
        df.rename(columns={2: 'Level(dB)'}, inplace=True)

        # If the columns are not correctly named, you might need to rename them explicitly
        df.columns = [str(i) for i in range(len(df.columns))]

        # Truncate the wave data to the first 250 points
        wave_cols = [str(i) for i in range(4, 254)]  # Adjust indices to match wave data columns
        df = df.iloc[:, :4 + 250]  # Keep first 250 data points and the first four columns (metadata)

        # Interpolation of 250 points to 244 points
        new_points = np.linspace(0, 249, 244)  # Interpolate over 244 points

        # Create a new DataFrame to store the interpolated values
        new_df = pd.DataFrame()

        # Apply linear interpolation to each row
        for index, row in df.iterrows():
            wave_data = row[wave_cols].astype(float)
            non_wave_data = row.drop(wave_cols)
            interpolated_values = np.interp(new_points, np.arange(250), wave_data)
            new_row = pd.concat([non_wave_data, pd.Series(interpolated_values)], ignore_index=True)
            new_df = pd.concat([new_df, new_row.to_frame().T], ignore_index=True)

        # Update column names to reflect interpolated points
        non_wave_cols = ['filename', 'Hearing', 'Freq(Hz)', 'Level(dB)']
        new_cols = non_wave_cols + [str(i) for i in range(244)]
        new_df.columns = new_cols

        # Concatenate the DataFrame for the current file with the overall DataFrame
        all_data = pd.concat([all_data, new_df])

    except Exception as e:
        print(f"An error occurred while processing file {filename}: {e}")

liberman = all_data
liberman

An error occurred while processing file ABR_Data_Liberman/WPZ134/ABR-134-L-45.2.tsv: could not convert string to float: ': 30'


Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,WPZ142,0,22600.0,15,-0.05561,-0.040673,-0.020629,-0.009095,-0.010052,0.003137,...,0.029115,0.038782,0.037434,0.028127,0.033294,0.035436,0.022338,0.003483,-0.018112,-0.010842
1,WPZ142,0,22600.0,20,-0.059651,-0.07876,-0.077258,-0.067385,-0.066943,-0.061194,...,0.007538,0.008046,0.020469,0.014811,0.007109,0.022042,0.040054,0.039098,0.019423,0.024187
2,WPZ142,0,22600.0,25,0.030502,0.042868,0.027796,0.018103,0.013964,0.010188,...,-0.059239,-0.065699,-0.053545,-0.015305,0.002125,0.029216,0.075391,0.073864,0.028866,0.032548
3,WPZ142,1,22600.0,30,0.031048,0.033101,0.03379,0.051136,0.043199,0.041612,...,0.11585,0.098632,0.117231,0.131006,0.108633,0.07588,0.056617,0.05193,0.024162,0.004457
4,WPZ142,1,22600.0,40,-0.11492,-0.081732,-0.025496,0.005907,0.010266,-0.001072,...,-0.028359,-0.003287,0.01223,0.023294,0.03083,0.048006,0.067395,0.018452,-0.032176,-0.061776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,WPZ106,1,11300.0,40,0.034004,0.014954,-0.012269,-0.020021,0.01084,0.015107,...,0.079243,0.071795,0.096091,0.101451,0.071972,0.090937,0.077631,0.094975,0.090937,0.082764
7,WPZ106,1,11300.0,50,-0.037149,-0.059339,-0.043227,-0.011675,-0.014165,-0.009537,...,0.041068,0.053679,0.065702,0.102768,0.111719,0.073299,0.064585,0.049231,0.049997,0.048516
8,WPZ106,1,11300.0,60,-0.05098,-0.046275,-0.028011,-0.025571,-0.015962,0.008768,...,-0.04236,-0.021671,0.008585,0.007897,0.006271,-0.023955,-0.048496,-0.069648,-0.016034,-0.023775
9,WPZ106,1,11300.0,70,-0.074153,-0.031929,-0.041565,-0.057704,-0.080204,-0.056439,...,0.0639,0.072551,0.051112,-0.02772,-0.023936,-0.0473,-0.022426,-0.068996,-0.02568,-0.068362


## 1.3 Marcotti Data

In [None]:
import pandas as pd
import os

# Initialize an empty dataframe
all_data = pd.DataFrame()

# Iterate over each CSV file in the directory
for filename in os.listdir('Marcotti_ABR'):
    if filename.endswith('.csv'):
        # Read the CSV file into a dataframe
        df = pd.read_csv(os.path.join('Marcotti_ABR', filename))
        
        # Add a new column with the filename
        df.insert(0, 'filename', filename)
        
        # Append the dataframe to the combined dataframe
        all_data = pd.concat([all_data, df])

# Reset the index of the combined dataframe
all_data.reset_index(drop=True, inplace=True)

In [None]:
all_data['filename'] = all_data['filename'].str.replace('.csv','')

In [None]:
thresholds = pd.read_csv('thresholds.csv')
thresholds = thresholds.rename(columns={'Mouse ID': 'filename'})

In [None]:
import pandas as pd

# Assuming all_data and thresholds are your dataframes

# First, let's melt the thresholds dataframe to long format
thresholds_melt = thresholds.melt(id_vars='filename', var_name='Freq(Hz)', value_name='threshold')

# Convert 'Freq(Hz)' to numeric, as it's currently a string because of the melt operation
thresholds_melt['Freq(Hz)'] = pd.to_numeric(thresholds_melt['Freq(Hz)'])

# Convert 'filename' in both dataframes to string
all_data['filename'] = all_data['filename'].astype(str)
thresholds_melt['filename'] = thresholds_melt['filename'].astype(str)

# Now, you can merge the dataframes
all_data = pd.merge(all_data, thresholds_melt, on=['filename', 'Freq(Hz)'], how='left')


# Now, we can merge the dataframes
all_data = pd.merge(all_data, thresholds_melt, on=['filename', 'Freq(Hz)'], how='left')


In [None]:
all_data = all_data.iloc[:, :-1]

In [None]:
cols = list(all_data.columns)
cols.insert(1, cols.pop())
all_data = all_data[cols]

In [None]:
all_data.insert(all_data.columns.get_loc('threshold_x'), 'Hearing', all_data.apply(lambda row: 1 if row['Level(dB)'] >= row['threshold_x'] else 0, axis=1))
all_data = all_data.drop(columns=['threshold_x'])

In [None]:
all_data = all_data.loc[:,'filename':'976']

In [None]:
all_data

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,967,968,969,970,971,972,973,974,975,976
0,97,0,100.0,0.0,0.033791,0.039729,0.047360,0.056843,0.065612,0.070652,...,0.348105,0.341615,0.333120,0.324244,0.316473,0.310317,0.305540,0.301918,0.299239,0.296636
1,97,0,100.0,5.0,0.265480,0.272649,0.276348,0.275191,0.269755,0.262055,...,-0.128397,-0.130201,-0.130415,-0.130186,-0.132303,-0.137713,-0.142897,-0.145481,-0.148816,-0.151563
2,97,0,100.0,10.0,0.037163,0.024569,0.013040,0.003256,-0.003927,-0.007555,...,0.137230,0.141710,0.143284,0.142663,0.141399,0.139028,0.133610,0.124458,0.111841,0.098145
3,97,0,100.0,15.0,-0.019905,-0.026373,-0.033627,-0.038605,-0.039325,-0.034740,...,0.070884,0.086998,0.098786,0.106550,0.110857,0.112451,0.111858,0.109425,0.104791,0.097083
4,97,0,100.0,20.0,0.160114,0.161109,0.161414,0.161318,0.160955,0.159810,...,-0.088411,-0.090558,-0.092034,-0.096604,-0.101225,-0.104872,-0.106351,-0.104073,-0.097163,-0.085631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,-0.118377,-0.119035,-0.118623,-0.118036,-0.117730,-0.117819,...,0.259145,0.262498,0.271489,0.284537,0.297751,0.308969,0.317394,0.322188,0.322519,0.319128
16222,71,1,30000.0,95.0,0.082913,0.088466,0.096360,0.105286,0.113020,0.116751,...,-0.267053,-0.263192,-0.261740,-0.265535,-0.271114,-0.278002,-0.285858,-0.295151,-0.306930,-0.320709
16223,71,1,24000.0,95.0,-0.094127,-0.090920,-0.084665,-0.076653,-0.066335,-0.055525,...,-0.820263,-0.816483,-0.812340,-0.807451,-0.800046,-0.789668,-0.777439,-0.765569,-0.755071,-0.746031
16224,71,1,18000.0,95.0,-0.232872,-0.223349,-0.213287,-0.202847,-0.190474,-0.177575,...,-0.718762,-0.724659,-0.726310,-0.723738,-0.718188,-0.710650,-0.701895,-0.691927,-0.680184,-0.665809


In [None]:
wave_cols = [str(i) for i in range(976)]
new_points = np.linspace(0, 976, 244)

# Create a new DataFrame to store the interpolated values
new_df = pd.DataFrame()

# Apply linear interpolation to each row
print(len(new_points))


244


In [None]:
for index, row in all_data.iterrows():
    wave_data = row[wave_cols].astype(float)
    non_wave_data = row.drop(wave_cols)
    interpolated_values = np.interp(new_points, np.arange(976), wave_data)
    new_row = pd.concat([non_wave_data, pd.Series(interpolated_values)], ignore_index=True)
    new_df = pd.concat([new_df, new_row.to_frame().T], ignore_index=True)

In [None]:
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,248
0,97,0,100.0,0.0,0.296636,0.033791,0.065695,0.066535,0.051126,0.041897,...,0.118143,0.174712,0.236451,0.271457,0.32267,0.347076,0.348439,0.341828,0.310418,0.299239
1,97,0,100.0,5.0,-0.151563,0.26548,0.269628,0.245469,0.226786,0.204498,...,-0.018511,0.009673,-0.015604,-0.054524,-0.086791,-0.119809,-0.123298,-0.130141,-0.137624,-0.148816
2,97,0,100.0,10.0,0.098145,0.037163,-0.003987,-0.003464,0.007656,0.020094,...,0.026247,0.036444,0.039849,0.059662,0.103865,0.114026,0.114591,0.141563,0.139067,0.111841
3,97,0,100.0,15.0,0.097083,-0.019905,-0.039249,-0.000669,0.016501,0.005105,...,-0.200474,-0.196833,-0.203717,-0.233226,-0.21554,-0.125194,-0.004055,0.086468,0.112425,0.104791
4,97,0,100.0,20.0,-0.085631,0.160114,0.160936,0.146369,0.129469,0.118032,...,0.033685,0.09628,0.132701,0.094899,0.047131,-0.010179,-0.065745,-0.090487,-0.104812,-0.097163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,0.319128,-0.118377,-0.117731,-0.125463,-0.152373,-0.198541,...,0.523606,0.503557,0.483672,0.428102,0.363084,0.319916,0.272272,0.262388,0.308785,0.322519
16222,71,1,30000.0,95.0,-0.320709,0.082913,0.113082,0.118743,0.117828,0.100142,...,-0.013549,-0.040811,-0.071388,-0.116064,-0.158196,-0.221033,-0.265463,-0.263319,-0.277889,-0.30693
16223,71,1,24000.0,95.0,-0.746031,-0.094127,-0.066157,-0.037672,-0.032352,0.004445,...,-0.782004,-0.829512,-0.866106,-0.900933,-0.896161,-0.852806,-0.827938,-0.816608,-0.789839,-0.755071
16224,71,1,18000.0,95.0,-0.665809,-0.232872,-0.190261,-0.138403,-0.112954,-0.11619,...,-0.460814,-0.497342,-0.590164,-0.625124,-0.649152,-0.668249,-0.690185,-0.724465,-0.710774,-0.680184


In [None]:
# Assuming df is your DataFrame
new_df.columns = ["filename", "Hearing", "Freq(Hz)", "Level(dB)"] + new_df.columns[4:].tolist()

# Drop the 9 columns after "Level(dB)"
new_df = new_df.drop(new_df.columns[4], axis=1)


In [None]:
new_df.columns = ['filename', 'Hearing', 'Freq(Hz)', 'Level(dB)'] + list(range(len(new_df.columns) - 4))

In [None]:
new_df

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,97,0,100.0,0.0,0.033791,0.065695,0.066535,0.051126,0.041897,0.03876,...,0.118143,0.174712,0.236451,0.271457,0.32267,0.347076,0.348439,0.341828,0.310418,0.299239
1,97,0,100.0,5.0,0.26548,0.269628,0.245469,0.226786,0.204498,0.201019,...,-0.018511,0.009673,-0.015604,-0.054524,-0.086791,-0.119809,-0.123298,-0.130141,-0.137624,-0.148816
2,97,0,100.0,10.0,0.037163,-0.003987,-0.003464,0.007656,0.020094,0.008527,...,0.026247,0.036444,0.039849,0.059662,0.103865,0.114026,0.114591,0.141563,0.139067,0.111841
3,97,0,100.0,15.0,-0.019905,-0.039249,-0.000669,0.016501,0.005105,0.018298,...,-0.200474,-0.196833,-0.203717,-0.233226,-0.21554,-0.125194,-0.004055,0.086468,0.112425,0.104791
4,97,0,100.0,20.0,0.160114,0.160936,0.146369,0.129469,0.118032,0.102733,...,0.033685,0.09628,0.132701,0.094899,0.047131,-0.010179,-0.065745,-0.090487,-0.104812,-0.097163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,-0.118377,-0.117731,-0.125463,-0.152373,-0.198541,-0.197439,...,0.523606,0.503557,0.483672,0.428102,0.363084,0.319916,0.272272,0.262388,0.308785,0.322519
16222,71,1,30000.0,95.0,0.082913,0.113082,0.118743,0.117828,0.100142,0.069171,...,-0.013549,-0.040811,-0.071388,-0.116064,-0.158196,-0.221033,-0.265463,-0.263319,-0.277889,-0.30693
16223,71,1,24000.0,95.0,-0.094127,-0.066157,-0.037672,-0.032352,0.004445,0.059592,...,-0.782004,-0.829512,-0.866106,-0.900933,-0.896161,-0.852806,-0.827938,-0.816608,-0.789839,-0.755071
16224,71,1,18000.0,95.0,-0.232872,-0.190261,-0.138403,-0.112954,-0.11619,-0.104785,...,-0.460814,-0.497342,-0.590164,-0.625124,-0.649152,-0.668249,-0.690185,-0.724465,-0.710774,-0.680184


In [None]:
non_wave_cols = ['filename', 'Hearing', 'Freq(Hz)', 'Level(dB)']
new_cols = non_wave_cols + [str(i) for i in range(244)]
new_df.columns = new_cols

In [None]:
new_df

Unnamed: 0,filename,Hearing,Freq(Hz),Level(dB),0,1,2,3,4,5,...,234,235,236,237,238,239,240,241,242,243
0,97,0,100.0,0.0,0.033791,0.065695,0.066535,0.051126,0.041897,0.03876,...,0.118143,0.174712,0.236451,0.271457,0.32267,0.347076,0.348439,0.341828,0.310418,0.299239
1,97,0,100.0,5.0,0.26548,0.269628,0.245469,0.226786,0.204498,0.201019,...,-0.018511,0.009673,-0.015604,-0.054524,-0.086791,-0.119809,-0.123298,-0.130141,-0.137624,-0.148816
2,97,0,100.0,10.0,0.037163,-0.003987,-0.003464,0.007656,0.020094,0.008527,...,0.026247,0.036444,0.039849,0.059662,0.103865,0.114026,0.114591,0.141563,0.139067,0.111841
3,97,0,100.0,15.0,-0.019905,-0.039249,-0.000669,0.016501,0.005105,0.018298,...,-0.200474,-0.196833,-0.203717,-0.233226,-0.21554,-0.125194,-0.004055,0.086468,0.112425,0.104791
4,97,0,100.0,20.0,0.160114,0.160936,0.146369,0.129469,0.118032,0.102733,...,0.033685,0.09628,0.132701,0.094899,0.047131,-0.010179,-0.065745,-0.090487,-0.104812,-0.097163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16221,71,1,36000.0,95.0,-0.118377,-0.117731,-0.125463,-0.152373,-0.198541,-0.197439,...,0.523606,0.503557,0.483672,0.428102,0.363084,0.319916,0.272272,0.262388,0.308785,0.322519
16222,71,1,30000.0,95.0,0.082913,0.113082,0.118743,0.117828,0.100142,0.069171,...,-0.013549,-0.040811,-0.071388,-0.116064,-0.158196,-0.221033,-0.265463,-0.263319,-0.277889,-0.30693
16223,71,1,24000.0,95.0,-0.094127,-0.066157,-0.037672,-0.032352,0.004445,0.059592,...,-0.782004,-0.829512,-0.866106,-0.900933,-0.896161,-0.852806,-0.827938,-0.816608,-0.789839,-0.755071
16224,71,1,18000.0,95.0,-0.232872,-0.190261,-0.138403,-0.112954,-0.11619,-0.104785,...,-0.460814,-0.497342,-0.590164,-0.625124,-0.649152,-0.668249,-0.690185,-0.724465,-0.710774,-0.680184


In [None]:
marcotti = new_df

## 1.4 Combine Liberman, Manor, Marcotti Data

In [None]:
df = pd.concat([manor, liberman, marcotti])

In [None]:
df.columns = list(df.columns[:4]) + [str(int(col)) for col in df.columns[4:]]

for col in df.columns[4:]:  # Adjust the index as per your DataFrame
    df[col] = df[col].astype(float)

In [None]:
df.to_csv('liberman_manor_marcotti.csv', index=False)