In [3]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv("synthetic_medical_conditions_dataset.csv")

tfidf = TfidfVectorizer(max_features=1000)
X = tfidf.fit_transform(data['condition_description']).toarray()
y = data['requires_authorization'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)


class MedicalConditionsDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_dataset = MedicalConditionsDataset(X_train, y_train)
test_dataset = MedicalConditionsDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


class PriorAuthorizationModel(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_size_2, output_size):
        super(PriorAuthorizationModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)
        self.fc3 = nn.Linear(hidden_size_2, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return self.softmax(out)


input_size = X_train.shape[1]
hidden_size = 128
hidden_size_2 = 64
output_size = 2

model = PriorAuthorizationModel(input_size, hidden_size, hidden_size_2, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Evaluation on the test set
model.eval()
all_preds = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.numpy())

# Calculate accuracy and classification report
accuracy = accuracy_score(y_test, all_preds)
report = classification_report(y_test, all_preds, target_names=["No Authorization", "Authorization Required"])

print(f"Accuracy: {accuracy * 100:.2f}%")
print(report)


Epoch [1/100], Loss: 1.3200
Epoch [2/100], Loss: 0.6955
Epoch [3/100], Loss: 0.6941
Epoch [4/100], Loss: 0.6917
Epoch [5/100], Loss: 0.6922
Epoch [6/100], Loss: 0.6913
Epoch [7/100], Loss: 0.6932
Epoch [8/100], Loss: 0.6901
Epoch [9/100], Loss: 0.6894
Epoch [10/100], Loss: 0.6889
Epoch [11/100], Loss: 0.6883
Epoch [12/100], Loss: 0.6866
Epoch [13/100], Loss: 0.6848
Epoch [14/100], Loss: 0.6847
Epoch [15/100], Loss: 0.6850
Epoch [16/100], Loss: 0.6828
Epoch [17/100], Loss: 0.6820
Epoch [18/100], Loss: 0.6807
Epoch [19/100], Loss: 0.6797
Epoch [20/100], Loss: 0.6800
Epoch [21/100], Loss: 0.6784
Epoch [22/100], Loss: 0.6785
Epoch [23/100], Loss: 0.6774
Epoch [24/100], Loss: 0.6764
Epoch [25/100], Loss: 0.6759
Epoch [26/100], Loss: 0.6758
Epoch [27/100], Loss: 0.6745
Epoch [28/100], Loss: 0.6742
Epoch [29/100], Loss: 0.6737
Epoch [30/100], Loss: 0.6730
Epoch [31/100], Loss: 0.6725
Epoch [32/100], Loss: 0.6720
Epoch [33/100], Loss: 0.6725
Epoch [34/100], Loss: 0.6710
Epoch [35/100], Loss: 0