In [None]:
import sys
from pathlib import Path
import pandas as pd

import datagnosis.logger as log
log.add(sink=sys.stderr, level="INFO")

In [None]:
# Get path to brazil covid19 data
root = next((parent for parent in Path.cwd().parents if parent.name == "datagnosis"), None)
data_path = root / Path("data/Brazil_covid19/Brazil_covid19.csv")
df = pd.read_csv(data_path)

# Set time horizon to set up the problem as a classification task
time_horizon = 14

df.loc[(df["Days_hospital_to_outcome"] <= time_horizon) & (df["is_dead"] == 1), f"is_dead_at_time_horizon={time_horizon}"] = 1
df.loc[(df["Days_hospital_to_outcome"] > time_horizon), f"is_dead_at_time_horizon={time_horizon}"] = 0
df.loc[(df["is_dead"] == 0), f"is_dead_at_time_horizon={time_horizon}"] = 0
df[f"is_dead_at_time_horizon={time_horizon}"] = df[f"is_dead_at_time_horizon={time_horizon}"].astype(int)

# drop survival columns as they are not needed for a classification problem
df.drop(columns=["is_dead", "Days_hospital_to_outcome"], inplace=True)

y = df[f"is_dead_at_time_horizon={time_horizon}"]
X = df.drop(columns=[f"is_dead_at_time_horizon={time_horizon}"])
display(X.head())
display(y.head())



In [None]:

from datagnosis.plugins.core.datahandler import DataHandler
from datagnosis.plugins.core.models.simple_mlp import SimpleMLP
import torch
import torch.nn as nn


datahander = DataHandler(X, y, batch_size=32)


model = SimpleMLP(input_dim=X.shape[1], output_dim=len(y.unique()))
# creating our optimizer and loss function object
learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [None]:
# datagnosis absolute
from datagnosis.plugins import Plugins

plugins = Plugins().list()
print(plugins)


In [None]:
hcm = Plugins().get(
    "vog",
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    lr=learning_rate,
    epochs=10,
    num_classes=2,
    logging_interval=1,
)
hcm.fit(
    datahandler=datahander,
    use_caches_if_exist=True,
)

In [None]:
print(hcm.score_names)
hcm.plot_scores(axis=1, plot_type="dist")

In [None]:
import pandas as pd
print(hcm.name())
print(hcm.hard_direction())
hardest_5 = hcm.extract_datapoints(method="index", indices=[0,1,2,3,4,5])

display(pd.DataFrame(
    data={
        "indices":hardest_5[0][2],
        f"{X.columns[0]}": hardest_5[0][0].transpose(0,1)[0],
        f"{X.columns[1]}": hardest_5[0][0].transpose(0,1)[1],
        f"{X.columns[2]}": hardest_5[0][0].transpose(0,1)[2],
        f"{X.columns[3]}": hardest_5[0][0].transpose(0,1)[3],
        "labels": hardest_5[0][1],
        "scores": hardest_5[1],
    }
))