### Childhood Autistic Spectrum Disorder Screening using Machine Learning

The early diagnosis of neurodevelopment disorders can improve treatment and significantly decrease the associated
healthcare costs. In this project, we will use supervised learning to diagnose Autistic Spectrum Disorder
(ASD) based on behavioural features and individual characteristics. More specifically, we will build and deploy a neural network using the Keras API. \
This project will use a dataset provided by the SmartBridge. The dataset is saved at: [GDrive link](https://drive.google.com/file/d/1MbSfEStcGwMkqSz8Md3HUHUPilJ4RA3Z/view?usp=sharing)


In [None]:
import sys
import pandas as pd
import sklearn
import keras
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# import the dataset
file = '/content/drive/MyDrive/Colab Notebooks/Autism_Data.csv'

# read the csv
data = pd.read_csv(file, sep = ',', index_col = None)

In [None]:
# print the shape of the DataFrame, so we can see how many examples we have
print(data.loc[0])

A1_Score                         1
A2_Score                         1
A3_Score                         1
A4_Score                         1
A5_Score                         0
A6_Score                         0
A7_Score                         1
A8_Score                         1
A9_Score                         0
A10_Score                        0
age                             26
gender                           f
ethnicity           White-European
jundice                         no
austim                          no
contry_of_res      'United States'
used_app_before                 no
result                           6
age_desc             '18 and more'
relation                      Self
Class/ASD                       NO
Name: 0, dtype: object


In [None]:
# print out multiple patients at the same time
data.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,...,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,result,age_desc,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,...,f,White-European,no,no,'United States',no,6,'18 and more',Self,NO
1,1,1,0,1,0,0,0,1,0,1,...,m,Latino,no,yes,Brazil,no,5,'18 and more',Self,NO
2,1,1,0,1,1,0,1,1,1,1,...,m,Latino,yes,yes,Spain,no,8,'18 and more',Parent,YES
3,1,1,0,1,0,0,1,1,0,1,...,f,White-European,no,yes,'United States',no,6,'18 and more',Self,NO
4,1,0,0,0,0,0,0,1,0,0,...,f,?,no,no,Egypt,no,2,'18 and more',?,NO


In [None]:
# print out a description of the dataframe
data.describe()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,result
count,704.0,704.0,704.0,704.0,704.0,704.0,704.0,704.0,704.0,704.0,704.0
mean,0.721591,0.453125,0.457386,0.495739,0.49858,0.284091,0.417614,0.649148,0.323864,0.573864,4.875
std,0.448535,0.498152,0.498535,0.500337,0.500353,0.451301,0.493516,0.477576,0.468281,0.494866,2.501493
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
50%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,4.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0


In [None]:
# drop unwanted columns
data = data.drop(['result', 'age_desc'], axis=1)

In [None]:
data.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,age,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,relation,Class/ASD
0,1,1,1,1,0,0,1,1,0,0,26,f,White-European,no,no,'United States',no,Self,NO
1,1,1,0,1,0,0,0,1,0,1,24,m,Latino,no,yes,Brazil,no,Self,NO
2,1,1,0,1,1,0,1,1,1,1,27,m,Latino,yes,yes,Spain,no,Parent,YES
3,1,1,0,1,0,0,1,1,0,1,35,f,White-European,no,yes,'United States',no,Self,NO
4,1,0,0,0,0,0,0,1,0,0,40,f,?,no,no,Egypt,no,?,NO


In [None]:
# create X and Y datasets for training
data.rename(columns={'Class/ASD': 'Class'}, inplace=True)
x = data.drop(columns=['Class'])
y = data['Class']

In [None]:
x.head()

Unnamed: 0,A1_Score,A2_Score,A3_Score,A4_Score,A5_Score,A6_Score,A7_Score,A8_Score,A9_Score,A10_Score,age,gender,ethnicity,jundice,austim,contry_of_res,used_app_before,relation
0,1,1,1,1,0,0,1,1,0,0,26,f,White-European,no,no,'United States',no,Self
1,1,1,0,1,0,0,0,1,0,1,24,m,Latino,no,yes,Brazil,no,Self
2,1,1,0,1,1,0,1,1,1,1,27,m,Latino,yes,yes,Spain,no,Parent
3,1,1,0,1,0,0,1,1,0,1,35,f,White-European,no,yes,'United States',no,Self
4,1,0,0,0,0,0,0,1,0,0,40,f,?,no,no,Egypt,no,?


In [None]:
# convert the data to categorical values - one-hot-encoded vectors
X = pd.get_dummies(x)

In [None]:
# print the new categorical column labels
X.columns.values

array(['A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score',
       'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score',
       'age_17', 'age_18', 'age_19', 'age_20', 'age_21', 'age_22',
       'age_23', 'age_24', 'age_25', 'age_26', 'age_27', 'age_28',
       'age_29', 'age_30', 'age_31', 'age_32', 'age_33', 'age_34',
       'age_35', 'age_36', 'age_37', 'age_38', 'age_383', 'age_39',
       'age_40', 'age_41', 'age_42', 'age_43', 'age_44', 'age_45',
       'age_46', 'age_47', 'age_48', 'age_49', 'age_50', 'age_51',
       'age_52', 'age_53', 'age_54', 'age_55', 'age_56', 'age_58',
       'age_59', 'age_60', 'age_61', 'age_64', 'age_?', 'gender_f',
       'gender_m', "ethnicity_'Middle Eastern '",
       "ethnicity_'South Asian'", 'ethnicity_?', 'ethnicity_Asian',
       'ethnicity_Black', 'ethnicity_Hispanic', 'ethnicity_Latino',
       'ethnicity_Others', 'ethnicity_Pasifika', 'ethnicity_Turkish',
       'ethnicity_White-European', 'ethnicity_others', 'jundice_no',
  

In [None]:
# print an example patient from the categorical data
X.loc[1]

A1_Score             1
A2_Score             1
A3_Score             0
A4_Score             1
A5_Score             0
                    ..
relation_?           0
relation_Others      0
relation_Parent      0
relation_Relative    0
relation_Self        1
Name: 1, Length: 150, dtype: int64

In [None]:
# convert the class data to categorical values - one-hot-encoded vectors
Y = pd.get_dummies(y)

In [None]:
Y.head()

Unnamed: 0,NO,YES
0,1,0
1,1,0
2,0,1
3,1,0
4,1,0


In [None]:
from sklearn import model_selection
# split the X and Y data into training and testing datasets
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size = 0.2)

### Building the Network - Keras


In [None]:
# build a neural network using Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

# define a function to build the keras model
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=150, kernel_initializer='normal', activation='relu'))
    model.add(Dense(4, kernel_initializer='normal', activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    # compile model
    adam = Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

model = create_model()

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 1208      
                                                                 
 dense_1 (Dense)             (None, 4)                 36        
                                                                 
 dense_2 (Dense)             (None, 2)                 10        
                                                                 
Total params: 1,254
Trainable params: 1,254
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
print(type(X_train))
print(type(Y_train))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


### Training the Network

Now it's time for the fun! Training a Keras model is as simple as calling model.fit().

In [None]:
# fit the model to the training data
model.fit(np.asarray(X_train).astype('float32'), np.asarray(Y_train).astype('float32'), epochs=50, batch_size=10, verbose = 1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f02bc7dab60>

In [None]:
# generate classification report using predictions for categorical model
from sklearn.metrics import classification_report, accuracy_score

predictions = model.predict(np.asarray(X_test).astype('float32'))
predictions



array([[9.47918952e-01, 1.80906683e-01],
       [9.99999285e-01, 9.35827120e-05],
       [9.98945296e-01, 1.38229951e-02],
       [9.99887586e-01, 3.00618331e-03],
       [9.85756934e-01, 7.65620023e-02],
       [9.97079909e-01, 2.81033888e-02],
       [9.99001682e-01, 1.35775004e-02],
       [9.99995470e-01, 3.42278916e-04],
       [1.00000000e+00, 5.47291347e-06],
       [2.89689720e-01, 7.39793956e-01],
       [9.99995828e-01, 3.29482951e-04],
       [2.09628344e-01, 7.90371656e-01],
       [9.99983668e-01, 8.39739514e-04],
       [9.99850035e-01, 3.61424265e-03],
       [9.99991536e-01, 5.36419044e-04],
       [1.00000000e+00, 5.56204714e-06],
       [9.99999642e-01, 5.21544935e-05],
       [9.99999166e-01, 1.06798820e-04],
       [4.19060528e-01, 6.55584157e-01],
       [9.99994755e-01, 3.72094742e-04],
       [9.78060961e-01, 1.00358069e-01],
       [1.00000000e+00, 8.21211142e-06],
       [9.99996781e-01, 2.71350902e-04],
       [2.09628344e-01, 7.90371656e-01],
       [8.490403

In [None]:
print('Results for Categorical Model')
print(model.evaluate(np.asarray(X_test).astype('float32'), np.asarray(Y_test).astype('float32')))

Results for Categorical Model


[0.06559177488088608, 0.978723406791687]

In [None]:
# save the model by taking user input
save = input('Do you want to save the model? (y/n): ')
if save == 'y':
    model.save('ASD_model', overwrite=True, save_format='keras')
    print('Model saved successfully!')
else:
    print('Model was not saved!')
    sys.exit()