# MRI Data
- An MRI derived mean perfusion values for different brain regions

## Summary
### ADNI
* 4459 records, 138 labels
* 4459 records missing from dataset (all NAN)

### SHEFFIELD
* 104 records, 138 labels
* 6 rows contain all NAN values
* Removed rows, 98 records

In [3]:
import numpy as np
import pandas as pd

# ADNI DATA

In [4]:
df = pd.read_csv('../datasets/adni_data/ADNI_sMRI.csv')
print(df.shape)
df

(4459, 146)


Unnamed: 0,ID,TotalICVolume,GreyMatter,WhiteMatter,CSF,DeepGreyMatter,Brainstem,Background,NonVentricularCSF,3rdVentricle,...,LeftSuperiorTemporalGyrus,RightTemporalPole,LeftTemporalPole,RightTriangularInferiorFrontalGyrus,LeftTriangularInferiorFrontalGyrus,RightTransverseTemporalGyrus,LeftTransverseTemporalGyrus,Right Cortex,Left Cortex,Cerebellar Vermis
0,183bl,1214750.869,,,,,,,,2706.384202,...,,,,,,,,232505.2075,254897.6692,10233.253600
1,183m12,1214750.869,,,,,,,,2996.197727,...,,,,,,,,227700.1954,247076.2894,10115.602410
2,183m24,1214750.869,,,,,,,,3072.165927,...,,,,,,,,212094.4008,235671.8185,10231.821720
3,241bl,1214750.869,,,,,,,,2954.135320,...,,,,,,,,242753.8721,251966.4935,10816.146570
4,241m06,1214750.869,,,,,,,,3022.121600,...,,,,,,,,238270.3091,244937.4547,10110.303170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4454,5109bl,1214750.869,,,,,,,,3897.497914,...,,,,,,,,239684.5495,244014.6299,8832.965541
4455,4959bl,1214750.869,,,,,,,,2406.687517,...,,,,,,,,248666.2235,245058.8064,9273.111774
4456,2376m12,1214750.869,,,,,,,,1385.568343,...,,,,,,,,270418.6637,265627.5990,9016.570279
4457,2376bl,1214750.869,,,,,,,,1379.796349,...,,,,,,,,257523.1260,251449.4541,8734.288601


In [5]:
"""
Check for NAN values
"""
df.isnull().sum().sort_values(ascending=True)

ID                              0
Unnamed: 25                     0
RightHippocampus                0
LeftHippocampus                 0
RightInfLatVentricle            0
                             ... 
RightFusiformGyrus           4459
LeftFrontalPole              4459
RightFrontalPole             4459
LeftLingualGyrus             4459
LeftInferiorTemporalGyrus    4459
Length: 146, dtype: int64

In [6]:
# drop all columns that have all NAN values (left with 36 features)
df = df.dropna(axis=1, how='all')
df

Unnamed: 0,ID,TotalICVolume,3rdVentricle,4thVentricle,RightAccumbensArea,LeftAccumbensArea,RightAmygdala,LeftAmygdala,Brainstem.1,RightCaudate,...,LeftPutamen,RightThalamusProper,LeftThalamusProper,RightVentralDC,LeftVentralDC,LeftBasalForebrain,RightBasalForebrain,Right Cortex,Left Cortex,Cerebellar Vermis
0,183bl,1214750.869,2706.384202,3609.184822,291.551435,252.005358,835.108228,912.182725,17367.08342,6505.632312,...,3592.236503,5822.857187,6127.523393,3853.724849,4152.741615,473.947627,490.895945,232505.2075,254897.6692,10233.253600
1,183m12,1214750.869,2996.197727,3635.575129,234.388234,238.099130,800.650949,819.406019,17141.13145,7376.158593,...,3260.574014,5330.552084,5739.352444,3980.588189,4250.580968,420.033343,238.099130,227700.1954,247076.2894,10115.602410
2,183m24,1214750.869,3072.165927,4002.862747,191.821471,172.981862,750.158965,817.860661,17073.72283,8844.239201,...,2756.325428,5240.030635,5520.106101,4061.094265,4344.998101,424.949035,334.680108,212094.4008,235671.8185,10231.821720
3,241bl,1214750.869,2954.135320,2840.931017,257.332709,230.605449,1033.417235,1134.362340,16405.23630,3094.177326,...,2993.232221,5994.747683,6322.101395,4008.205437,4395.198489,395.276294,372.635433,242753.8721,251966.4935,10816.146570
4,241m06,1214750.869,3022.121600,2896.213470,195.010691,142.122747,919.989054,1123.161465,16199.16377,3109.397590,...,2833.204993,5583.160444,5788.291668,4031.563103,4246.814859,552.385195,454.988672,238270.3091,244937.4547,10110.303170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4454,5109bl,1214750.869,3897.497914,3170.369046,348.100232,331.390590,1136.878336,1099.618949,16413.33070,4192.978343,...,3153.659404,5548.223670,6613.074111,4180.108806,4264.902452,411.098694,520.282189,239684.5495,244014.6299,8832.965541
4455,4959bl,1214750.869,2406.687517,2577.421885,286.750805,294.593408,1126.900916,1076.690228,19722.07312,4100.419322,...,3885.063248,5714.913795,6176.816066,4131.609445,4335.787556,471.998496,492.010655,248666.2235,245058.8064,9273.111774
4456,2376m12,1214750.869,1385.568343,1375.924216,366.649059,381.545791,947.449411,1129.740642,17965.97610,3827.254748,...,3796.341875,5863.973909,6213.573529,4071.285615,4031.848023,717.281978,555.312302,270418.6637,265627.5990,9016.570279
4457,2376bl,1214750.869,1379.796349,1224.019747,372.605386,333.661236,871.396622,1136.693017,17992.79268,3607.775884,...,3324.708032,5789.583644,6022.143144,4111.924066,4146.701703,595.726457,415.716050,257523.1260,251449.4541,8734.288601


In [7]:
# how many columns have some nan values
null_rows = df.isnull().any(axis=1)
total_null = sum(null_rows)
print("Rows containing at least one NAN values: " + str(total_null))

Rows containing at least one NAN values: 0


In [17]:
# Extract a list of non nan clolumns
mri_features = list(df.columns[1:])
file = open('../preprocessed_data/ADNI_MRI_BRAIN_REGIONS.csv', 'w')
for f in mri_features:
    file.write(f+"\n")
file.close()

In [9]:
# Write to new preprocessed file
df.to_csv('../preprocessed_data/ADNI_MRI.csv', encoding='utf-8', index=False)

# SHEFFIELD DATA

In [11]:
df = pd.read_csv('../datasets/sheffield_data/SHEF_sMRI.csv')
print(df.shape)
df

(104, 146)


Unnamed: 0,ID,TotalICVolume,GreyMatter,WhiteMatter,CSF,DeepGreyMatter,Brainstem,Background,NonVentricularCSF,3rdVentricle,...,LeftSuperiorTemporalGyrus,RightTemporalPole,LeftTemporalPole,RightTriangularInferiorFrontalGyrus,LeftTriangularInferiorFrontalGyrus,RightTransverseTemporalGyrus,LeftTransverseTemporalGyrus,Right Cortex,Left Cortex,Cerebellar Vermis
0,SH_DARE_G1_001,1446449.234,550017.2838,446809.7216,396726.8913,35125.56927,17769.76754,3101237.522,355486.7572,1575.003632,...,5839.466589,8468.281245,7139.371930,2707.916400,3142.975997,979.103820,943.947489,,,
1,SH_DARE_G1_002,1552869.594,616840.1363,463802.1254,414430.3754,37676.12788,20120.82944,3153923.535,263579.9652,2954.008113,...,6080.282097,6692.001718,7217.588404,3165.824821,3378.520437,1183.888405,1574.123336,,,
2,SH_DARE_G1_003,1570559.845,606426.8249,514852.4430,394576.4168,36268.16971,18435.99001,3382503.189,343170.8017,2097.955194,...,5545.914234,7483.908035,8149.241961,3450.595766,3309.091457,1104.788303,1824.614572,,,
3,SH_DARE_G1_004,1380368.265,547252.6855,412446.2734,369713.0451,33366.74030,17589.52096,3082004.345,329365.1642,1190.037045,...,4518.449371,5763.857415,6106.630272,3471.673802,3477.826135,1117.087950,1559.177044,,,
4,SH_DARE_G1_005,1508361.919,595605.8328,457674.8675,398980.7088,36543.11450,19557.39535,3207421.626,362429.6842,1607.517351,...,6151.456500,7230.751911,7370.497816,3595.600592,3046.284931,1509.958889,1212.010075,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,SH_DARE_G3_024,,,,,,,,,,...,,,,,,,,,,
100,SH_DARE_G3_025,,,,,,,,,,...,,,,,,,,,,
101,SH_DARE_G3_026,,,,,,,,,,...,,,,,,,,,,
102,SH_DARE_G3_027,,,,,,,,,,...,,,,,,,,,,
