In [2]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import mlflow
import mlflow.sklearn

In [11]:
from datetime import datetime
epoch = 1
num_epochs= 10
print(f'{datetime.now().strftime("%H:%M:%S")}|Starting epoch {epoch+1}/{num_epochs}...')


00:58:50|Starting epoch 2/10...


In [3]:
df = pd.read_parquet('E:/CNR_2025/data/processed/PulseDB/union_PulseDB_Vital-1.parquet')
df

Unnamed: 0,PPG_F,ABP_F,Age,Gender,Height,Weight,BMI,T,SegSBP,SegDBP,Label
0,0.016168,0.833281,48.0,77,177.4,65.00,20.7,540.008,117.771182,77.801254,Normal BP
1,0.013872,0.819892,48.0,77,177.4,65.00,20.7,540.016,115.179517,70.234718,Normal BP
2,0.011973,0.805373,48.0,77,177.4,65.00,20.7,540.024,116.158356,71.168408,Normal BP
3,0.010471,0.789024,48.0,77,177.4,65.00,20.7,540.032,116.990362,70.618718,Normal BP
4,0.009380,0.770268,48.0,77,177.4,65.00,20.7,540.040,116.774254,70.640372,Normal BP
...,...,...,...,...,...,...,...,...,...,...,...
15519995,0.536085,0.196341,61.0,77,174.5,80.95,26.6,1079.968,91.794847,61.343359,Normal BP
15519996,0.589613,0.193312,61.0,77,174.5,80.95,26.6,1079.976,91.794847,61.343359,Normal BP
15519997,0.642340,0.190619,61.0,77,174.5,80.95,26.6,1079.984,91.794847,61.343359,Normal BP
15519998,0.692709,0.187881,61.0,77,174.5,80.95,26.6,1079.992,91.794847,61.343359,Normal BP


In [6]:

print(df['Label'].value_counts())

Label
Normal BP               12542245
Elevated BP              2111521
Hypertension Stage 1      865994
Hypertension Stage 2         220
Hypertensive Crisis           20
Name: count, dtype: int64


In [7]:
features = ["PPG_F", "ABP_F", "Age", "Gender", "Height", "Weight", "BMI", "T"]
X = df[features]
y = df["Label"]


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

In [11]:
classes = np.unique(y_encoded)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_encoded)
class_weights = {cls: weight for cls, weight in zip(classes, weights)}


In [14]:
mlflow.set_experiment("bp_classification")
with mlflow.start_run():
    clf = RandomForestClassifier(n_estimators=100, class_weight=class_weights, random_state=42)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    report = classification_report(y_test, y_pred, output_dict=True)
    cm = confusion_matrix(y_test, y_pred)

    # Log model
    mlflow.sklearn.log_model(clf, "model")

    # Log parameters and metrics
    mlflow.log_params({"n_estimators": 100, "random_state": 42})
    for label, scores in report.items():
        if isinstance(scores, dict):
            for metric, score in scores.items():
                mlflow.log_metric(f"{label}_{metric}", score)

    # Save confusion matrix
    np.save("confusion_matrix.npy", cm)
    mlflow.log_artifact("confusion_matrix.npy")



In [17]:
df8 = pd.read_parquet('E:/CNR_2025/data/processed/PulseDB/union_PulseDB_Vital-2.parquet')

features = ["PPG_F", "ABP_F", "Age", "Gender", "Height", "Weight", "BMI", "T"]
X8 = df8[features]
y8 = df8["Label"]
le = LabelEncoder()
y8 = le.fit_transform(y8)

In [18]:
from sklearn.metrics import classification_report

y_pred = clf.predict(X8)
print(classification_report(y8, y_pred, target_names=le.classes_))


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                      precision    recall  f1-score   support

         Elevated BP       0.00      0.00      0.00   3297437
Hypertension Stage 1       0.72      0.00      0.00   1038217
Hypertension Stage 2       0.00      0.00      0.00   3948297
 Hypertensive Crisis       0.00      0.00      0.00        52
           Normal BP       0.51      0.93      0.66   9424747

            accuracy                           0.49  17708750
           macro avg       0.25      0.19      0.13  17708750
        weighted avg       0.32      0.49      0.35  17708750



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
!mlflow ui

'mlflow' is not recognized as an internal or external command,
operable program or batch file.


: 

In [4]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from torch import nn

# Define the model architecture (must match the one used for training)
class Model(nn.Module):
    def __init__(self, input_dim, activation, num_class):
        super(Model, self).__init__()
        
        # Define layers
        self.layer1 = nn.Linear(input_dim, 1024)
        self.activation = activation
        self.dropout1 = nn.Dropout(0.5)

        self.layer2 = nn.Linear(1024, 512)
        self.dropout2 = nn.Dropout(0.5)

        self.layer3 = nn.Linear(512, 64)
        self.dropout3 = nn.Dropout(0.25)

        self.layer4 = nn.Linear(64, num_class)

    def forward(self, x):
        # Define forward pass
        x = self.dropout1(self.activation(self.layer1(x)))
        x = self.dropout2(self.activation(self.layer2(x)))
        x = self.dropout3(self.activation(self.layer3(x)))
        x = self.layer4(x)
        return x
    # Define activation function
def get_activation_function(activation_name):
    if activation_name == 'relu':
        return nn.ReLU()
    elif activation_name == 'sigmoid':
        return nn.Sigmoid()
    elif activation_name == 'tanh':
        return nn.Tanh()
    else:
        raise ValueError(f"Activation function '{activation_name}' is not supported.")



# Define your features (use the same ones as in training)
features = ["PPG_F", "ABP_F", "Age", "Gender", "Height", "Weight","T"]

# Prepare the DataFrame (replace with your actual DataFrame loading)
df = pd.read_parquet("E:/CNR_2025/data/processed/PulseDB/union_PulseDB_Vital-11.parquet")

# If there are any missing values in the features, drop them
df_clean = df.dropna(subset=features + ['Label'])

# Normalize the data (same normalization steps as during training)
# For example, using MinMaxScaler or StandardScaler, based on your preprocessing
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_clean[features] = scaler.fit_transform(df_clean[features])

# Encode labels (if necessary)
le = LabelEncoder()
df_clean['label'] = le.fit_transform(df_clean['Label'])

# Split into X (features) and y (labels)
X = df_clean[features].values.astype(np.float32)
y = df_clean['label'].values

# Load the model architecture
model = Model(input_dim=X.shape[1],activation = get_activation_function('relu'), num_class=len(np.unique(y)))

# Load the state dict into the model
state_dict = torch.load(r'E:\CNR_2025\models\model-v4\best_model_epoch_10.pth')
model.load_state_dict(state_dict)
model.to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()  # Set model to evaluation mode

# Convert to tensors
X_tensor = torch.tensor(X)
y_tensor = torch.tensor(y)

# Create a DataLoader
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

# Predict and calculate metrics
y_true = []
y_pred = []

with torch.no_grad():  # No need to calculate gradients during inference
    for inputs, labels in dataloader:
        inputs = inputs.to('cuda' if torch.cuda.is_available() else 'cpu')
        labels = labels.to('cuda' if torch.cuda.is_available() else 'cpu')

        outputs = model(inputs)  # Get model outputs
        _, predicted = torch.max(outputs, 1)  # Get the predicted class

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=1)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=1)

# Print the results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


  state_dict = torch.load(r'E:\CNR_2025\models\model-v4\best_model_epoch_10.pth')


Accuracy: 0.4079
F1 Score: 0.3462
Precision: 0.4678
Recall: 0.4079


In [None]:
Accuracy: 0.6439
F1 Score: 0.6286
Precision: 0.6822
Recall: 0.6439

In [None]:
Accuracy: 0.7157
F1 Score: 0.6433
Precision: 0.8483
Recall: 0.7157

In [None]:
Accuracy: 0.7297
F1 Score: 0.6382
Precision: 0.6029
Recall: 0.7297

In [None]:
Accuracy: 0.6518
F1 Score: 0.5999
Precision: 0.6629
Recall: 0.6518