# Example script
## Applying trained ML models to new batches.

In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline

# load the dataset
df = pd.read_pickle("./data/dataset.pkl")

Train the logistic regression model on batch CD01-2, CD01-3, CD01-4, under CHIR duration = 24h.

In [2]:
# Obtain training data
train_data = df[(df.batch_name == "CD01-2")  | (df.batch_name == "CD01-3") | (df.batch_name == "CD01-4")]

# build the logistic regression model
lr = LogisticRegression(penalty = "elasticnet", l1_ratio = 0.1, 
                            solver = "saga", class_weight="balanced",
                             C = 20, max_iter=20000, random_state=12345) 
feature_selector = SelectKBest(f_classif, k = 4)
clf = Pipeline([('scaler', StandardScaler()), 
                ("feature selection", feature_selector),
                ('classifier', lr)])

# optimize
clf.fit(train_data.feature.to_list(), train_data.label_24.to_list())

Apply the trained model to Batch CD01-1. Evaluate each concentration using Deviation Scores:
* Deviation Scores $< 0 \Rightarrow $ low dosage condition;
* Deviation Scores $\approx 0 \Rightarrow$  optimal dosage condition;
* Deviation Scores $> 0 \Rightarrow $ high dosage condition.

In [3]:
test_data = df[df.batch_name == "CD01-1"]
thres = 0.15

for conc in np.sort(test_data.CHIR_conc.unique()):
    pred_label = clf.predict(test_data[test_data.CHIR_conc == conc].feature.to_list())
    score = (pred_label == "high").mean() - (pred_label == "low").mean()
    if score < -thres:
        print("%d uM : score = %.2f --> low" % (conc, score))
    elif score > thres:
        print("%d uM : score = %.2f --> high" % (conc, score))
    else:
        print("%d uM : score = %.2f --> optimal" % (conc, score))

4 uM : score = -0.58 --> low
6 uM : score = -0.17 --> low
8 uM : score = -0.12 --> optimal
10 uM : score = 0.17 --> high
