In [5]:
import pandas as pd 
import numpy as np

# Oasis 정형 데이터 분석

In [6]:
df = pd.read_csv('https://raw.githubusercontent.com/yonghyeun/Dementia_Analysis/main/data/oasis/oasis_demographic.csv')

df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [7]:
df['CDR'].value_counts()

0.0    206
0.5    123
1.0     41
2.0      3
Name: CDR, dtype: int64

In [8]:
cond = (df['Group'] == 'Converted')

df[cond]['CDR'].value_counts() 

0.5    19
0.0    18
Name: CDR, dtype: int64

In [9]:
converted_idx = df[df['Group'] == 'Converted'].index

converted_idx

Int64Index([ 33,  34,  35,  36,  37,  38,  57,  58,  59,  81,  82,  83, 114,
            115, 194, 195, 218, 219, 220, 245, 246, 261, 262, 263, 264, 265,
            271, 272, 273, 274, 295, 296, 297, 298, 346, 347, 348],
           dtype='int64')

In [10]:
cond = (df['Group'] == 'Converted') & (df['CDR'] == 0.0)

df.loc[cond, 'Group'] = 'Nondemented' # converted 인데 CDR 이 0.0 인 경우엔 Non demented 로 변경

In [11]:
cond = (df['Group'] == 'Converted') & (df['CDR'] == 0.5) 

df.loc[cond, 'Group'] = 'Mild demented' # converted CDR 이 0.5 인 경우엔 mild demented 로 변경

In [12]:
cond = (df['Group'] == 'Demented') & (df['CDR'] == 0.5) 

df.loc[cond, 'Group'] = 'Mild demented'

In [13]:
cond = (df['Group'] == 'Demented') & (df['CDR'] >= 1) 

df.loc[cond, 'Group'] = 'Moderate demented'

In [14]:
cond = (df['Group'] == 'Nondemented') & (df['CDR'] > 0) # CDR 이 0.5 이상이면서 Nondemented 인 경우엔 이상치러 초리하자

df = df.drop(index = df[cond].index)

In [11]:
mri_label = df[['Subject ID','MRI ID','Group']]
mri_label.to_csv('MRI_LABEL.csv',index = False) # MRI 용 라벨 

mri_label.head()

Unnamed: 0,Subject ID,MRI ID,Group
0,OAS2_0001,OAS2_0001_MR1,Nondemented
1,OAS2_0001,OAS2_0001_MR2,Nondemented
2,OAS2_0002,OAS2_0002_MR1,Mild demented
3,OAS2_0002,OAS2_0002_MR2,Mild demented
4,OAS2_0002,OAS2_0002_MR3,Mild demented


In [15]:
converted_df = df.iloc[converted_idx]

id = converted_df['Subject ID'].unique()

converted_df_list = []

for _ in id:
    
    cond =  converted_df['Subject ID'] == _
    
    group = converted_df[cond]['Group'].unique()
    
    if len(group) >= 2:
        
        for g in group:
            
            cond = (converted_df['Subject ID'] == _) & (converted_df['Group'] ==g)
            
            cond_df = converted_df[cond]
            
            visits = cond_df['Visit'].values
                        
            post_visit = np.min(visits)
            
            converted_df_list.append(cond_df[cond_df['Visit'] == post_visit])
            
    else:
        
        cond_df = converted_df[cond]
        
        visits = cond_df['Visit'].values
                    
        post_visit = np.min(visits)
        
        converted_df_list.append(cond_df[cond_df['Visit'] == post_visit])

In [16]:
non_converted = df.drop(converted_idx,axis = 0)

id = non_converted['Subject ID'].unique()

non_converted_list = []

for _ in id:
    
    cond = non_converted['Subject ID'] == _
    
    cond_df = non_converted[cond]
    
    visits = cond_df['Visit'].values
    
    post_visit = np.min(visits)
    
    non_converted_list.append(cond_df[cond_df['Visit'] == post_visit])

In [17]:
converted_df = pd.concat(converted_df_list)
non_converted_df = pd.concat(non_converted_list)

df = pd.concat([converted_df,non_converted_df])

df = df.sort_values(by = 'Subject ID')

In [15]:
df.to_csv('OASIS.csv',index = False)