In [49]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler


In [50]:
clinical_features = pd.read_pickle("C:/Users/lpnhu/Downloads/clinical_features.pkl")
radiomic_features = pd.read_pickle("C:/Users/lpnhu/Downloads/radiomic_features.pkl")

In [51]:
print("Clinical Features Shape:", clinical_features.shape)
print("Radiomic Features Shape:", radiomic_features.shape)

Clinical Features Shape: (201, 10)
Radiomic Features Shape: (201, 810)


In [52]:
clinical_features.head()

Unnamed: 0_level_0,Sex,Age at Diagnosis,Tumor Location,NF1,Extent of Tumor Resection,Chemotherapy,Radiation,Progression Free Survival,Censorship,Cohort
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
C1003557,Male,2321,Cerebellar,None documented,Biopsy only,Yes,No,856,True,Discovery
C1026189,Female,1895,Suprasellar,None documented,Biopsy only,Yes,No,1228,False,Discovery
C1026558,Female,4989,Basal Ganglia,None documented,Biopsy only,Yes,No,1851,False,Replicate
C1031970,Male,226,Cerebellar,None documented,Partial resection,No,No,158,True,Replicate
C1032462,Male,1162,Thalamus,None documented,Gross/Near total resection,No,No,636,False,Discovery


In [53]:
# Check unique values in "Extent of Tumor Resection" column
unique_values = clinical_features['Extent of Tumor Resection'].unique()

print("Unique values in 'Extent of Tumor Resection':")
print(unique_values)

Unique values in 'Extent of Tumor Resection':
['Biopsy only' 'Partial resection' 'Gross/Near total resection'
 'Not Applicable']


In [54]:
# Check unique values in "NF1" column
unique_values = clinical_features['NF1'].unique()

print("Unique values in 'NF1':")
print(unique_values)

Unique values in 'NF1':
['None documented' 'Neurofibromatosis, Type 1 (NF-1)'
 'Tuberous Sclerosis (TSC1, TSC2)']


In [55]:
# Define a mapping for ordinal encoding
nf1_mapping = {
    'None documented': 0,
    'Neurofibromatosis, Type 1 (NF-1)': 1,
    'Tuberous Sclerosis (TSC1, TSC2)': 2
}

# Apply the mapping to the NF1 column
clinical_features['NF1'] = clinical_features['NF1'].map(nf1_mapping)

# Verify the changes
print(clinical_features['NF1'].unique())


[0 1 2]


In [56]:
# Encode the 'Extent of Tumor Resection' column
encoding_map = {
    'Biopsy only': 0,
    'Partial resection': 1,
    'Gross/Near total resection': 2,
    'Not Applicable': -1  # Assign -1 or handle as NaN
}

clinical_features['Extent of Tumor Resection'] = clinical_features['Extent of Tumor Resection'].map(encoding_map)

In [57]:
# Encode 'Sex' directly in the dataset
sex_mapping = {'Male': 1, 'Female': 0}
clinical_features['Sex'] = clinical_features['Sex'].map(sex_mapping)

# Verify the changes
print(clinical_features['Sex'].head())


SubjectID
C1003557    1
C1026189    0
C1026558    0
C1031970    1
C1032462    1
Name: Sex, dtype: int64


In [58]:
# One-hot encode 'Tumor Location'
tumor_location_encoded = pd.get_dummies(clinical_features['Tumor Location'], prefix='Tumor_Location').astype(int)

# Add the one-hot encoded columns to the dataset
clinical_features = pd.concat([clinical_features, tumor_location_encoded], axis=1)

# Drop the original column (optional)
clinical_features.drop('Tumor Location', axis=1, inplace=True)

# Verify the changes
clinical_features.head()


Unnamed: 0_level_0,Sex,Age at Diagnosis,NF1,Extent of Tumor Resection,Chemotherapy,Radiation,Progression Free Survival,Censorship,Cohort,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
C1003557,1,2321,0,0,Yes,No,856,True,Discovery,0,0,1,0,0,0,0,0,0,0
C1026189,0,1895,0,0,Yes,No,1228,False,Discovery,0,0,0,0,0,0,1,0,0,0
C1026558,0,4989,0,0,Yes,No,1851,False,Replicate,1,0,0,0,0,0,0,0,0,0
C1031970,1,226,0,1,No,No,158,True,Replicate,0,0,1,0,0,0,0,0,0,0
C1032462,1,1162,0,2,No,No,636,False,Discovery,0,0,0,0,0,0,0,0,1,0


In [59]:
# Convert 'Yes'/'No' in Chemotherapy and Radiation to 1/0
clinical_features['Chemotherapy'] = clinical_features['Chemotherapy'].map({'Yes': 1, 'No': 0})
clinical_features['Radiation'] = clinical_features['Radiation'].map({'Yes': 1, 'No': 0})

# (Optional) Convert 'True'/'False' in Censorship to 1/0
clinical_features['Censorship'] = clinical_features['Censorship'].astype(int)

# Verify the changes
clinical_features.head()


Unnamed: 0_level_0,Sex,Age at Diagnosis,NF1,Extent of Tumor Resection,Chemotherapy,Radiation,Progression Free Survival,Censorship,Cohort,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
C1003557,1,2321,0,0,1.0,0.0,856,1,Discovery,0,0,1,0,0,0,0,0,0,0
C1026189,0,1895,0,0,1.0,0.0,1228,0,Discovery,0,0,0,0,0,0,1,0,0,0
C1026558,0,4989,0,0,1.0,0.0,1851,0,Replicate,1,0,0,0,0,0,0,0,0,0
C1031970,1,226,0,1,0.0,0.0,158,1,Replicate,0,0,1,0,0,0,0,0,0,0
C1032462,1,1162,0,2,0.0,0.0,636,0,Discovery,0,0,0,0,0,0,0,0,1,0


In [60]:
# Merge clinical and radiomic features on SubjectID
merged = pd.merge(clinical_features, radiomic_features, on='SubjectID', how='inner')

print("Merged Data Shape:", merged.shape)
merged.head()


Merged Data Shape: (201, 829)


Unnamed: 0_level_0,Sex,Age at Diagnosis,NF1,Extent of Tumor Resection,Chemotherapy,Radiation,Progression Free Survival,Censorship,Cohort,Tumor_Location_Basal Ganglia,...,FLAIR_tumor_core_Collage_SumVarianceVariancePhi,FLAIR_tumor_core_Collage_SumVarianceVarianceTheta,DWI_tumor_core_Collage_AngularSecondMomentKurtosisPhi,DWI_tumor_core_Collage_AngularSecondMomentKurtosisTheta,DWI_tumor_core_Collage_AngularSecondMomentMedianPhi,DWI_tumor_core_Collage_AngularSecondMomentMedianTheta,DWI_tumor_core_Collage_AngularSecondMomentSkewnessPhi,DWI_tumor_core_Collage_AngularSecondMomentSkewnessTheta,DWI_tumor_core_Collage_AngularSecondMomentVariancePhi,DWI_tumor_core_Collage_AngularSecondMomentVarianceTheta
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C1003557,1,2321,0,0,1.0,0.0,856,1,Discovery,0,...,1.941625,12.176193,19.464655,14.563037,0.033372,0.034819,3.539243,2.832029,0.001043,0.000718
C1026189,0,1895,0,0,1.0,0.0,1228,0,Discovery,0,...,1.873572,10.73873,13.550386,17.165885,0.036555,0.039062,2.998257,3.259773,0.001241,0.001472
C1026558,0,4989,0,0,1.0,0.0,1851,0,Replicate,1,...,2.414853,15.554728,5.571096,10.804298,0.061439,0.059799,2.084859,2.918622,0.004272,0.007861
C1031970,1,226,0,1,0.0,0.0,158,1,Replicate,0,...,1.339963,12.073631,12.885997,11.631347,0.04755,0.046489,2.706248,2.740129,0.001594,0.002745
C1032462,1,1162,0,2,0.0,0.0,636,0,Discovery,0,...,1.896613,9.539467,22.579686,33.354137,0.035204,0.039255,3.80251,4.583136,0.002411,0.005533


In [61]:
train = merged[merged['Cohort'] == 'Discovery']
test = merged[merged['Cohort'] == 'Replicate']

print(f"Train Data Shape: {train.shape}")
print(f"Test Data Shape: {test.shape}")


Train Data Shape: (160, 829)
Test Data Shape: (41, 829)


In [62]:
# Extract clinical and radiomic column names
clinical_columns = ['Sex', 'Age at Diagnosis', 'Extent of Tumor Resection', 'Chemotherapy', 'Radiation', 'Censorship', 'NF1', 'Progression Free Survival'] + list(tumor_location_encoded.columns)
radiomic_columns = [col for col in merged.columns if col not in clinical_columns and col != 'Cohort']


In [63]:
# Split train data
train_clinical = train[clinical_columns]
train_radiomic = train[radiomic_columns]

# Split test data
test_clinical = test[clinical_columns]
test_radiomic = test[radiomic_columns]

print(f"Train Clinical Shape: {train_clinical.shape}")
print(f"Train Radiomic Shape: {train_radiomic.shape}")
print(f"Test Clinical Shape: {test_clinical.shape}")
print(f"Test Radiomic Shape: {test_radiomic.shape}")


Train Clinical Shape: (160, 18)
Train Radiomic Shape: (160, 810)
Test Clinical Shape: (41, 18)
Test Radiomic Shape: (41, 810)


In [64]:
# Save train clinical and radiomic features
train_clinical.to_csv("train_clinical.csv", index=True)
train_radiomic.to_csv("train_radiomic.csv", index=True)

# Save test clinical and radiomic features
test_clinical.to_csv("test_clinical.csv", index=True)
test_radiomic.to_csv("test_radiomic.csv", index=True)

print("Data saved successfully.")

Data saved successfully.


In [65]:
train_clinical.head()

Unnamed: 0_level_0,Sex,Age at Diagnosis,Extent of Tumor Resection,Chemotherapy,Radiation,Censorship,NF1,Progression Free Survival,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
C1003557,1,2321,0,1.0,0.0,1,0,856,0,0,1,0,0,0,0,0,0,0
C1026189,0,1895,0,1.0,0.0,0,0,1228,0,0,0,0,0,0,1,0,0,0
C1032462,1,1162,2,0.0,0.0,0,0,636,0,0,0,0,0,0,0,0,1,0
C1046730,1,2386,0,1.0,0.0,1,0,463,0,0,1,0,0,0,0,0,0,0
C1060998,1,6338,2,0.0,0.0,0,0,90,0,0,1,0,0,0,0,0,0,0


In [66]:
train_radiomic.head()

Unnamed: 0_level_0,T1_shape_Elongation,T1_shape_Flatness,T1_shape_LeastAxisLength,T1_shape_MajorAxisLength,T1_shape_Maximum2DDiameterColumn,T1_shape_Maximum2DDiameterRow,T1_shape_Maximum2DDiameterSlice,T1_shape_Maximum3DDiameter,T1_shape_MeshVolume,T1_shape_MinorAxisLength,...,FLAIR_tumor_core_Collage_SumVarianceVariancePhi,FLAIR_tumor_core_Collage_SumVarianceVarianceTheta,DWI_tumor_core_Collage_AngularSecondMomentKurtosisPhi,DWI_tumor_core_Collage_AngularSecondMomentKurtosisTheta,DWI_tumor_core_Collage_AngularSecondMomentMedianPhi,DWI_tumor_core_Collage_AngularSecondMomentMedianTheta,DWI_tumor_core_Collage_AngularSecondMomentSkewnessPhi,DWI_tumor_core_Collage_AngularSecondMomentSkewnessTheta,DWI_tumor_core_Collage_AngularSecondMomentVariancePhi,DWI_tumor_core_Collage_AngularSecondMomentVarianceTheta
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C1003557,0.895004,0.690769,22.308064,32.294559,34.82815,38.470768,38.470768,38.678159,13296.875,28.903759,...,1.941625,12.176193,19.464655,14.563037,0.033372,0.034819,3.539243,2.832029,0.001043,0.000718
C1026189,0.776855,0.593865,26.752859,45.048718,41.436699,55.226805,55.443665,55.524769,28252.0,34.996313,...,1.873572,10.73873,13.550386,17.165885,0.036555,0.039062,2.998257,3.259773,0.001241,0.001472
C1032462,0.709129,0.667974,51.174767,76.611955,76.837491,91.268834,70.837843,95.420124,148462.0,54.327782,...,1.896613,9.539467,22.579686,33.354137,0.035204,0.039255,3.80251,4.583136,0.002411,0.005533
C1046730,0.89089,0.712004,37.081309,52.080165,65.192024,57.271284,65.122961,65.398777,57388.458333,46.397694,...,1.700908,12.123925,12.604474,35.036862,0.040027,0.040606,2.826033,4.148227,0.001594,0.001745
C1060998,0.755602,0.585505,19.188719,32.772938,29.732137,97.416631,79.056942,100.851376,6736.458333,24.763299,...,2.221509,13.672168,14.633792,14.635358,0.04485,0.045139,3.199397,2.914802,0.003448,0.001797


In [67]:
# Check which columns are incorrectly assigned
print("Radiomic columns:")
print(radiomic_columns)

print("Clinical columns:")
print(clinical_columns)


Radiomic columns:
['T1_shape_Elongation', 'T1_shape_Flatness', 'T1_shape_LeastAxisLength', 'T1_shape_MajorAxisLength', 'T1_shape_Maximum2DDiameterColumn', 'T1_shape_Maximum2DDiameterRow', 'T1_shape_Maximum2DDiameterSlice', 'T1_shape_Maximum3DDiameter', 'T1_shape_MeshVolume', 'T1_shape_MinorAxisLength', 'T1_shape_Sphericity', 'T1_shape_SurfaceArea', 'T1_shape_SurfaceVolumeRatio', 'T1_shape_VoxelVolume', 'T1_firstorder_10Percentile', 'T1_firstorder_90Percentile', 'T1_firstorder_Energy', 'T1_firstorder_Entropy', 'T1_firstorder_InterquartileRange', 'T1_firstorder_Kurtosis', 'T1_firstorder_Maximum', 'T1_firstorder_MeanAbsoluteDeviation', 'T1_firstorder_Mean', 'T1_firstorder_Median', 'T1_firstorder_Minimum', 'T1_firstorder_Range', 'T1_firstorder_RobustMeanAbsoluteDeviation', 'T1_firstorder_RootMeanSquared', 'T1_firstorder_Skewness', 'T1_firstorder_TotalEnergy', 'T1_firstorder_Uniformity', 'T1_firstorder_Variance', 'T1_glcm_Autocorrelation', 'T1_glcm_ClusterProminence', 'T1_glcm_ClusterShade'

In [68]:
train_clinical.head()

Unnamed: 0_level_0,Sex,Age at Diagnosis,Extent of Tumor Resection,Chemotherapy,Radiation,Censorship,NF1,Progression Free Survival,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
C1003557,1,2321,0,1.0,0.0,1,0,856,0,0,1,0,0,0,0,0,0,0
C1026189,0,1895,0,1.0,0.0,0,0,1228,0,0,0,0,0,0,1,0,0,0
C1032462,1,1162,2,0.0,0.0,0,0,636,0,0,0,0,0,0,0,0,1,0
C1046730,1,2386,0,1.0,0.0,1,0,463,0,0,1,0,0,0,0,0,0,0
C1060998,1,6338,2,0.0,0.0,0,0,90,0,0,1,0,0,0,0,0,0,0


In [69]:
# Extract the column names from the DataFrame
radiomic_columns = train_radiomic.columns.tolist()

# Display the first 10 columns as a sample
print("Radiomic Columns:", radiomic_columns[:10])


Radiomic Columns: ['T1_shape_Elongation', 'T1_shape_Flatness', 'T1_shape_LeastAxisLength', 'T1_shape_MajorAxisLength', 'T1_shape_Maximum2DDiameterColumn', 'T1_shape_Maximum2DDiameterRow', 'T1_shape_Maximum2DDiameterSlice', 'T1_shape_Maximum3DDiameter', 'T1_shape_MeshVolume', 'T1_shape_MinorAxisLength']


In [70]:
radiomic_columns

['T1_shape_Elongation',
 'T1_shape_Flatness',
 'T1_shape_LeastAxisLength',
 'T1_shape_MajorAxisLength',
 'T1_shape_Maximum2DDiameterColumn',
 'T1_shape_Maximum2DDiameterRow',
 'T1_shape_Maximum2DDiameterSlice',
 'T1_shape_Maximum3DDiameter',
 'T1_shape_MeshVolume',
 'T1_shape_MinorAxisLength',
 'T1_shape_Sphericity',
 'T1_shape_SurfaceArea',
 'T1_shape_SurfaceVolumeRatio',
 'T1_shape_VoxelVolume',
 'T1_firstorder_10Percentile',
 'T1_firstorder_90Percentile',
 'T1_firstorder_Energy',
 'T1_firstorder_Entropy',
 'T1_firstorder_InterquartileRange',
 'T1_firstorder_Kurtosis',
 'T1_firstorder_Maximum',
 'T1_firstorder_MeanAbsoluteDeviation',
 'T1_firstorder_Mean',
 'T1_firstorder_Median',
 'T1_firstorder_Minimum',
 'T1_firstorder_Range',
 'T1_firstorder_RobustMeanAbsoluteDeviation',
 'T1_firstorder_RootMeanSquared',
 'T1_firstorder_Skewness',
 'T1_firstorder_TotalEnergy',
 'T1_firstorder_Uniformity',
 'T1_firstorder_Variance',
 'T1_glcm_Autocorrelation',
 'T1_glcm_ClusterProminence',
 'T1_gl

In [71]:
# Group the features by their prefixes
grouped_features = {}
for feature in radiomic_features:
    prefix = feature.split('_')[0]  # Extract the prefix (e.g., T1, T2, FLAIR, DWI)
    if prefix not in grouped_features:
        grouped_features[prefix] = []
    grouped_features[prefix].append(feature)

In [72]:
for prefix, features in grouped_features.items():
    # Create a DataFrame with a single column for the features
    df = pd.DataFrame(features, columns=["Feature"])
    # Save to a CSV file
    filename = f"path_{prefix}.csv"
    df.to_csv(filename, index=False)
    print(f"Saved: {filename}")

Saved: path_T1.csv
Saved: path_T2.csv
Saved: path_T1gd.csv
Saved: path_FLAIR.csv
Saved: path_DWI.csv


In [73]:
train_clinical.head()

Unnamed: 0_level_0,Sex,Age at Diagnosis,Extent of Tumor Resection,Chemotherapy,Radiation,Censorship,NF1,Progression Free Survival,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
SubjectID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
C1003557,1,2321,0,1.0,0.0,1,0,856,0,0,1,0,0,0,0,0,0,0
C1026189,0,1895,0,1.0,0.0,0,0,1228,0,0,0,0,0,0,1,0,0,0
C1032462,1,1162,2,0.0,0.0,0,0,636,0,0,0,0,0,0,0,0,1,0
C1046730,1,2386,0,1.0,0.0,1,0,463,0,0,1,0,0,0,0,0,0,0
C1060998,1,6338,2,0.0,0.0,0,0,90,0,0,1,0,0,0,0,0,0,0


In [74]:
train = pd.read_csv("train_clinical.csv")

In [75]:
train.head()

Unnamed: 0,SubjectID,Sex,Age at Diagnosis,Extent of Tumor Resection,Chemotherapy,Radiation,Censorship,NF1,Progression Free Survival,Tumor_Location_Basal Ganglia,Tumor_Location_Brainstem,Tumor_Location_Cerebellar,Tumor_Location_Lobar,Tumor_Location_Multifocal,Tumor_Location_OPG,Tumor_Location_Suprasellar,Tumor_Location_Tectum,Tumor_Location_Thalamus,Tumor_Location_Ventricular
0,C1003557,1,2321,0,1.0,0.0,1,0,856,0,0,1,0,0,0,0,0,0,0
1,C1026189,0,1895,0,1.0,0.0,0,0,1228,0,0,0,0,0,0,1,0,0,0
2,C1032462,1,1162,2,0.0,0.0,0,0,636,0,0,0,0,0,0,0,0,1,0
3,C1046730,1,2386,0,1.0,0.0,1,0,463,0,0,1,0,0,0,0,0,0,0
4,C1060998,1,6338,2,0.0,0.0,0,0,90,0,0,1,0,0,0,0,0,0,0


In [76]:
input_n1 = train_radiomic.shape[1]
print(f"Number of Radiomic Features (input_n1): {input_n1}")


Number of Radiomic Features (input_n1): 810


In [77]:
input_n2 = train_clinical.shape[1]
print(f"Number of Clinical Features (input_n2): {input_n2}")


Number of Clinical Features (input_n2): 18
