<a href="https://colab.research.google.com/github/vitaldb/plans/blob/main/predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PlanS Model for predicting hepatocellular carcinoma in hepatitis B patients

In [2]:
!pip install scikit-survival
!pip install scikit-learn==1.1.3
# Rerunning the runtime required because we change the scikit-learn
!wget https://github.com/vitaldb/plans/raw/main/model.pkl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
--2023-01-30 01:32:59--  https://github.com/vitaldb/plans/raw/main/model.pkl
Resolving github.com (github.com)... 140.82.112.4
Connecting to github.com (github.com)|140.82.112.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/vitaldb/plans/main/model.pkl [following]
--2023-01-30 01:32:59--  https://raw.githubusercontent.com/vitaldb/plans/main/model.pkl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4117320 (3.9M) [application/octet-stream]
Saving to: ‘model.pkl.1’


2023-01-30 01:32:59 (54.9 MB/s) -

In [5]:
from sksurv.ensemble import RandomSurvivalForest
import pandas as pd
import numpy as np
import sys
import pickle

# load model
model = pickle.load(open('model.pkl', 'rb'))

# sample values
male = 0
e_t = 1  # entecavir (ETV) -> 0, tenofovir (TDF) -> 1
lc = 1  # None for missing value
plt = 172  # 1000mm3
tbil = 1.1  # mg/dL
alt = 122  # U/L
dna = 74279943  # IU/mL
hbeag = 1  # 0 or 1

x = np.array([male, e_t, lc, plt, tbil, alt, dna, hbeag]).astype(float)
x = x[None, ...]

y_pred = model.predict(x).flatten()[0]
y_risk = (1 - np.exp(-model.predict_cumulative_hazard_function(x, return_array=True).flatten())) * 100

x[:, 1] = 0
y_pred_e = model.predict(x).flatten()[0]
y_risk_e = (1 - np.exp(-model.predict_cumulative_hazard_function(x, return_array=True).flatten())) * 100

x[:, 1] = 1
y_pred_t = model.predict(x).flatten()[0]
y_risk_t = (1 - np.exp(-model.predict_cumulative_hazard_function(x, return_array=True).flatten())) * 100

# risk score = sum of the predict cumulative hazard
print(f'risk score = {y_pred:.3f}')
print(f'risk score (entecavir) = {y_pred_e:.3f}')
print(f'risk score (tenofovir) = {y_pred_t:.3f}')
print()

# event probability = 1 - survival probability
df = pd.DataFrame({'time(month)':model.event_times_, 'event(%)':y_risk, 'entecavir event(%)':y_risk_t, 'tenofovir event(%)': y_risk_e})
print(df)

risk score = 9.070
risk score (entecavir) = 9.400
risk score (tenofovir) = 9.070

     time(month)   event(%)  entecavir event(%)  tenofovir event(%)
0           12.0   0.218543            0.218543            0.202561
1           13.0   0.516917            0.516917            0.493732
2           14.0   0.669045            0.669045            0.690609
3           15.0   0.870766            0.870766            0.929286
4           16.0   0.906254            0.906254            0.987972
..           ...        ...                 ...                 ...
102        131.0  17.344897           17.344897           18.701208
103        132.0  17.344897           17.344897           18.701208
104        133.0  17.685310           17.685310           19.215756
105        135.0  18.187345           18.187345           19.708456
106        148.0  18.187345           18.187345           20.064516

[107 rows x 4 columns]
