# Neural Network 

Fingerprint: Coulomb

In [37]:
from Coulomb import *
from sklearn.model_selection import train_test_split
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import KFold  


In [38]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=251)


In [39]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Normalize the target (hform)
target_scaler = MinMaxScaler()  # You can use StandardScaler if needed
y_train = target_scaler.fit_transform(y_train.reshape(-1, 1) if isinstance(y_train, np.ndarray) else y_train.to_numpy().reshape(-1, 1))
y_test = target_scaler.transform(y_test.reshape(-1, 1) if isinstance(y_test, np.ndarray) else y_test.to_numpy().reshape(-1, 1))

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)



In [40]:
# Define the neural network
class RegressionNN(nn.Module):
    def __init__(self, input_dim):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)  # Increased neurons
        self.bn1 = nn.BatchNorm1d(256)  # Batch normalization
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.fc4 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(p=0.2)  # Dropout to reduce overfitting

    def forward(self, x):
        x = F.leaky_relu(self.bn1(self.fc1(x)))  # LeakyReLU activation
        x = self.dropout(x)
        x = F.leaky_relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.leaky_relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x



In [49]:

# Define cross-validation training loop with train_test_split
def cross_val_train(model_class, X_train, y_train, epochs, k_folds):
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    fold_results = []
    best_val_loss = float('inf')
    best_model_state = None

    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train)):
        print(f"\nFold {fold + 1}/{k_folds}")

        # Use train_test_split to split the fold's training data
        X_fold_train, X_val, y_fold_train, y_val = train_test_split(
            X_train[train_idx], y_train[train_idx], test_size=0.2, random_state=42
        )

        # Initialize model, optimizer, scheduler
        model = model_class(X_fold_train.shape[1])
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
        criterion = nn.MSELoss()

        for epoch in range(epochs):
            # Training phase
            model.train()
            optimizer.zero_grad()
            outputs = model(X_fold_train)
            loss = criterion(outputs, y_fold_train)
            loss.backward()
            optimizer.step()

            # Step the learning rate scheduler
            scheduler.step()

            # Evaluation phase
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs, y_val)

            # Convert MSE to RMSE for better interpretability
            rmse = torch.sqrt(loss).item()
            val_rmse = torch.sqrt(val_loss).item()

            # Print RMSE every 50 epochs
            if (epoch + 1) % 500 == 0:
                print(f"Epoch [{epoch + 1}/{epochs}], RMSE: {rmse:.4f}, Val RMSE: {val_rmse:.4f}")

        # Store final validation loss for the fold
        fold_results.append(val_loss.item())

        # Save the model state if it's the best so far
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            best_model_state = model.state_dict()

    # Print overall results
    print("\nCross-Validation Results:")
    print(f"Fold Losses: {fold_results}")
    print(f"Mean Validation Loss: {np.mean(fold_results):.4f}")
    print(f"Standard Deviation: {np.std(fold_results):.4f}")

    # Save the best model state
    torch.save(best_model_state, "best_model.pth")
    print("Best model saved as 'best_model.pth'.")

In [None]:
# Initialize loss function
criterion = nn.MSELoss()

# Perform cross-validation
cross_val_train(RegressionNN, X_train, y_train, epochs=1000, k_folds=5)

# Save the model
# Example usage: torch.save(model.state_dict(), "enhanced_regression_model.pth")








Fold 1/5
Epoch [50/100], RMSE: 0.2560, Val RMSE: 0.1582
Epoch [100/100], RMSE: 0.2350, Val RMSE: 0.1349

Fold 2/5
Epoch [50/100], RMSE: 0.2003, Val RMSE: 0.1239
Epoch [100/100], RMSE: 0.1879, Val RMSE: 0.1138

Fold 3/5
Epoch [50/100], RMSE: 0.1909, Val RMSE: 0.1438
Epoch [100/100], RMSE: 0.1882, Val RMSE: 0.1234

Fold 4/5
Epoch [50/100], RMSE: 0.2020, Val RMSE: 0.1263
Epoch [100/100], RMSE: 0.1750, Val RMSE: 0.1201

Fold 5/5
Epoch [50/100], RMSE: 0.1972, Val RMSE: 0.1388
Epoch [100/100], RMSE: 0.1837, Val RMSE: 0.1177

Cross-Validation Results:
Fold Losses: [0.01820087619125843, 0.012941728346049786, 0.015232588164508343, 0.014429143629968166, 0.013853750191628933]
Mean Validation Loss: 0.0149
Standard Deviation: 0.0018
Best model saved as 'best_model.pth'.


In [53]:
# Load the best model and train it on the full training set
def train_on_full_data(model_class, X_train, y_train, X_test, y_test, criterion, epochs=100):
    model = model_class(X_train.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    # Load the best model state
    model.load_state_dict(torch.load("best_model.pth"))

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        scheduler.step()

        if (epoch + 1) % 100 == 0:
            rmse = torch.sqrt(loss).item()
            print(f"Epoch [{epoch + 1}/{epochs}], RMSE: {rmse:.4f}")

    print("Training on full dataset completed.")

    # Evaluate on test set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        test_loss = criterion(test_outputs, y_test)
        test_rmse = torch.sqrt(test_loss).item()
        print(f"\nTest RMSE: {test_rmse:.4f}")

    # Save the final model
    torch.save(model.state_dict(), "final_model.pth")
    print("Final model saved as 'final_model.pth'.")

# Train the best model on the full training set and evaluate on test set
train_on_full_data(RegressionNN, X_train, y_train, X_test, y_test, criterion, epochs=1000)

  model.load_state_dict(torch.load("best_model.pth"))


Epoch [100/1000], RMSE: 0.1261
Epoch [200/1000], RMSE: 0.1184
Epoch [300/1000], RMSE: 0.1158
Epoch [400/1000], RMSE: 0.1167
Epoch [500/1000], RMSE: 0.1175


KeyboardInterrupt: 

[0.11333410441875458]
[0.11041968315839767]