## Uploading Modules

In [16]:
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model, svm
from sklearn.model_selection import train_test_split, cross_val_score
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import accuracy_score, f1_score,confusion_matrix,recall_score,precision_score,log_loss,roc_auc_score,roc_curve

## GSE63063 Analysis with Lasso

In [9]:
ad_y = pd.read_csv('/content/GSE63063_AD_demo.csv')
ctl_y = pd.read_csv('/content/GSE63063_CTL_demo.csv')
mci_y = pd.read_csv('/content/GSE63063_MCI_demo.csv')
genes = pd.read_csv('/content/GSE63063_genes.csv')
ad_x = pd.read_csv('/content/GSE63063_AD.csv')
ctl_x = pd.read_csv('/content/GSE63063_CTL.csv')
mci_x = pd.read_csv('/content/GSE63063_MCI.csv')

In [17]:
x_ad_ctl = pd.concat([ad_x,ctl_x],axis=1)
y_ad_ctl = pd.concat([ad_y,ctl_y],axis=0)

x_mci_ctl = pd.concat([mci_x,ctl_x],axis=1)
y_mci_ctl = pd.concat([mci_y,ctl_y],axis=0)

x_ad_mci = pd.concat([ad_x,mci_x],axis=1)
y_ad_mci = pd.concat([ad_y,mci_y],axis=0)


In [33]:
y_ad_ctl.index = y_ad_ctl['Subj']
y_mci_ctl.index = y_mci_ctl['Subj']
y_ad_mci.index = y_ad_mci['Subj']

x_ad_ctl = x_ad_ctl.transpose()
x_mci_ctl = x_mci_ctl.transpose()
x_ad_mci = x_ad_mci.transpose()

In [25]:
y_ad_ctl_p = y_ad_ctl['Status']
for i in range(0,len(y_ad_ctl_p)):
  if y_ad_ctl_p[i] == 'AD':
    y_ad_ctl_p[i] = 1
  elif y_ad_ctl_p[i] == 'CTL':
    y_ad_ctl_p[i] = 0

y_mci_ctl_p = y_mci_ctl['Status']
for i in range(0,len(y_mci_ctl_p)):
  if y_mci_ctl_p[i] == 'MCI':
    y_mci_ctl_p[i] = 1
  elif y_mci_ctl_p[i] == 'CTL':
    y_mci_ctl_p[i] = 0

y_ad_mci_p = y_ad_mci['Status']
for i in range(0,len(y_ad_mci_p)):
  if y_ad_mci_p[i] == 'AD':
    y_ad_mci_p[i] = 1
  elif y_ad_mci_p[i] == 'MCI':
    y_ad_mci_p[i] = 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [93]:

## AD to CNTL
x_train,X_test,y_train,y_test = train_test_split(x_ad_ctl,y_ad_ctl_p,random_state=42,test_size=0.2)
y_train=y_train.astype('int')

#logistic regression
log_reg = linear_model.LogisticRegression(
    penalty='l1',
    C=0.9,
    solver='liblinear')
log_reg.fit(x_train,y_train)
y_pred = log_reg.predict(X_test)
print(accuracy_score(list(y_test),y_pred))
print(f1_score(list(y_test),y_pred))
print(roc_auc_score(list(y_test),y_pred))
ad_cntl_coef = log_reg.coef_[0]

0.7272727272727273
0.7457627118644068
0.7261904761904762


In [94]:
## MCI to CNTL

x_train,X_test,y_train,y_test = train_test_split(x_mci_ctl,y_mci_ctl_p,random_state=42,test_size=0.2)
y_train=y_train.astype('int')

#logistic regression
log_reg = linear_model.LogisticRegression(
    penalty='l1',
    C=0.8,
    solver='liblinear')
log_reg.fit(x_train,y_train)
y_pred = log_reg.predict(X_test)
print(accuracy_score(list(y_test),y_pred))
print(f1_score(list(y_test),y_pred))
print(roc_auc_score(list(y_test),y_pred))
mci_cntl_coef = log_reg.coef_[0]

0.7346938775510204
0.6666666666666667
0.7213804713804713


In [95]:
## MCI to AD

x_train,X_test,y_train,y_test = train_test_split(x_ad_mci,y_ad_mci_p,random_state=42,test_size=0.2)
y_train=y_train.astype('int')

#logistic regression
log_reg = linear_model.LogisticRegression(
    penalty='l1',
    C=0.957,
    solver='liblinear')
log_reg.fit(x_train,y_train)
y_pred = log_reg.predict(X_test)
print(accuracy_score(list(y_test),y_pred))
print(f1_score(list(y_test),y_pred))
print(roc_auc_score(list(y_test),y_pred))
ad_mci_coef = log_reg.coef_[0]

0.62
0.7076923076923076
0.5998389694041868


In [110]:
res_df = pd.DataFrame(list(zip(genes['x'],list(ad_cntl_coef),list(mci_cntl_coef),list(ad_mci_coef))),columns = ['Genes','AD_Control_Weights','MCI_Control_Weights','AD_MCI_Weights'])

In [115]:
res_df.to_csv('GSE63063_Weights_Results.csv')

## miRNA Analysis with Lasso

In [92]:
miRNA_AD = pd.read_csv('/content/miRNA_AD.csv')
miRNA_CTL = pd.read_csv('/content/miRNA_MCI.csv')
miRNAs = pd.read_csv('/content/miRNAs.csv')
miRNA_AD_demo = pd.read_csv('/content/miRNA_AD_demo.csv')
miRNA_CTL_demo = pd.read_csv('/content/miRNA_CTL_demo.csv')

In [94]:
miRNA_AD_demo.index = miRNA_AD_demo['Subj']
miRNA_CTL_demo.index = miRNA_CTL_demo['Subj']
X = pd.concat([miRNA_AD,miRNA_CTL],axis = 1)
y = pd.concat([miRNA_AD_demo,miRNA_CTL_demo],axis = 0)
X = X.transpose()
y = y['Status']
for i in range(0,len(y)):
  if y[i] == 'MCI-C':
    y[i] = 1
  elif y[i] == 'MCI-NC':
    y[i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [95]:
# Checking which subjects have missing data or error 

temp = X.applymap(np.isreal)
cols_to_ignore = []
row_to_ignore = []
for k in range(0,temp.shape[1]):
  if np.sum(temp.iloc[:,k]) != 157:
    cols_to_ignore.append(k)
for k in range(0,temp.shape[0]):
  if np.sum(temp.iloc[k,:]) != 2577:
    row_to_ignore.append(k)
cols_to_ignore = X.isna().any()
columns_input = []
for b in cols_to_ignore:
  columns_input.append(not b)

X =  X.iloc[5:X.shape[0],columns_input]
y = y[5:]

In [125]:

x_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=0.2)
y_train=y_train.astype('int')

#logistic regression
log_reg = linear_model.LogisticRegression(
    penalty='l1',
    C=0.125,
    solver='liblinear')
log_reg.fit(x_train,y_train)
y_pred = log_reg.predict(X_test)
print(accuracy_score(list(y_test),y_pred))
print(f1_score(list(y_test),y_pred))
print(roc_auc_score(list(y_test),y_pred))
miRNA_coefs = log_reg.coef_[0]

0.6153846153846154
0.39999999999999997
0.5692934782608696


In [158]:
miRNAs = pd.read_csv('/content/miRNAs.csv')
res_mi = pd.DataFrame(list(zip(miRNAs.iloc[0:2562,0],list(miRNA_coefs))),columns = ['miRNA','Weight'])

In [160]:
res_mi.to_csv('miRNA_Weights.csv')