In [1]:
# Importing the required libraries (install in your environment first)
import numpy as np
import pandas as pd
from math import log2, sqrt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import plotly.express as px
import plotly.graph_objects as go

## Import Data to Dataframe

In [2]:
data_filePath = 'QRNGvsPRNG_TrainingData.txt'
df = pd.read_csv(data_filePath, sep=' ',header=None, dtype={"data": str, "label": np.int64})
df.columns = ["data", "label"]
df.head()

Unnamed: 0,data,label
0,0000101000100111111110011011110111101101010111...,1
1,0100101111010000110010000101001110101001001010...,1
2,1000101010100100011100101111011111001110011101...,1
3,0111101100010110010000011111111001110001100110...,1
4,1111100000011110111111111111101001100100011010...,1


### Process Labels and Train_Test_split

In [3]:
df['label'].value_counts()

label
1    12000
2    12000
Name: count, dtype: int64

In [4]:
df['label'] = df['label'].apply(lambda x: x-1)

In [67]:
from sklearn.model_selection import train_test_split
X = df['data'].values
y = df['label'].values

# Split the each string of X into a list of characters, and convert them to integers
X = np.array([list(map(int, list(x))) for x in X])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, shuffle=True, random_state=42)
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

In [68]:
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape), (X_test.shape, y_test.shape)

(((17280, 100), (17280, 1)),
 ((1920, 100), (1920, 1)),
 ((4800, 100), (4800, 1)))

In [69]:
X_train[10], y_train[10]

(array([0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
        1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0,
        0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0,
        1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]),
 array([1]))

## Train Classifier

In [70]:
y_train

array([[0],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]])

In [71]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [105]:
class RNClassifier(nn.Module):
    def __init__(self):
        super(RNClassifier, self).__init__()
        self.dropout = nn.Dropout(0.5)
        
        self.fc1 = nn.Linear(100, 64)
        self.bn1 = nn.BatchNorm1d(64)
        
        self.fc2 = nn.Linear(64, 64)
        self.bn2 = nn.BatchNorm1d(64)
        
        self.fc3 = nn.Linear(64, 64)   
        self.bn3 = nn.BatchNorm1d(64)
    
        self.fc5 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        # x = self.fc3(x)
        # x = self.bn3(x)
        # x = F.relu(x)
        
        x = self.fc5(x)
        # return x
        return torch.sigmoid(x)

model = RNClassifier()
# Split the data into training and validation sets

In [108]:
import torch.optim as optim

# criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, verbose=True, factor=0.5)

epochs = 10000
bactch_size = 4
patience =100

X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float()

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=bactch_size, shuffle=True)

val_data = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_data, batch_size=bactch_size, shuffle=True)

  X_train = torch.tensor(X_train).float()
  y_train = torch.tensor(y_train).float()
  X_val = torch.tensor(X_val).float()
  y_val = torch.tensor(y_val).float()


In [110]:
best_val_loss = np.inf
patience_counter = 0
best_val_accuracy = 0
best_accu_epoch = 0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    avg_train_loss = running_loss / len(train_loader)
    
    print(f'Epoch: {epoch}, Loss: {avg_train_loss}')
    
    if epoch % 5 == 0:
        model.eval()
        total_loss = 0.0
        accurate = 0
        
        with torch.no_grad():
            for i, data in enumerate(val_loader, 0):
                inputs, labels = data
                outputs = model(inputs)
                # print(outputs)
                # output = torch.argmax(outputs, 1)
                output = torch.round(outputs)
                accurate += torch.sum(output == labels).item()
                
                val_loss = criterion(outputs, labels)
                total_loss += val_loss.item()
        
        avg_val_loss = total_loss / len(val_loader)
        accuracy = accurate / len(y_val)
        
        # scheduler.step(avg_val_loss)
        
        print(f'Epoch: {epoch}, Validation Loss:{avg_val_loss}, Accuracy: {accuracy}, accurate: {accurate}, total: {len(y_val)}')
        
        # Check if validation loss improved
        if accuracy > best_val_accuracy:
            best_val_accuracy = accuracy
            patience_counter = 0  # Reset counter if validation loss improved
            best_accu_epoch = epoch
            torch.save(model.state_dict(), f'model/best_model_epoch{epoch}.pth')  # Save the best model
        else:
            patience_counter += 1
            print(f"Validation accuracy did not improve for {patience_counter} epochs.")
        
        #  # Early stopping
        if patience_counter >= patience:
            print(f"Early stopping triggered after {patience} epochs with no improvement. Best accuracy: {best_val_accuracy} at epoch {best_accu_epoch}")
            break
                

Epoch: 0, Loss: 0.6836093868922304
Epoch: 0, Validation Loss:0.6947738741834958, Accuracy: 0.5145833333333333, accurate: 988, total: 1920
Epoch: 1, Loss: 0.6845677883812675
Epoch: 2, Loss: 0.6825451243530821
Epoch: 3, Loss: 0.6818700810480449
Epoch: 4, Loss: 0.6831432122185275
Epoch: 5, Loss: 0.6841333798926186
Epoch: 5, Validation Loss:0.6943737986187141, Accuracy: 0.5166666666666667, accurate: 992, total: 1920
Epoch: 6, Loss: 0.6831708277541179
Epoch: 7, Loss: 0.6813423861025109
Epoch: 8, Loss: 0.6834004921493707
Epoch: 9, Loss: 0.6839415942767152
Epoch: 10, Loss: 0.685672658123076
Epoch: 10, Validation Loss:0.6927746662249168, Accuracy: 0.515625, accurate: 990, total: 1920
Validation accuracy did not improve for 1 epochs.
Epoch: 11, Loss: 0.6843576080821179
Epoch: 12, Loss: 0.6829402519734922
Epoch: 13, Loss: 0.6846776731726196
Epoch: 14, Loss: 0.6825897476325432
Epoch: 15, Loss: 0.6837543761426652
Epoch: 15, Validation Loss:0.6938212599605322, Accuracy: 0.5151041666666667, accurate

KeyboardInterrupt: 

In [23]:
#Testing the model
from sklearn.svm import SVC
m = SVC(gamma=2, C=1, random_state=42)



Accuracy of the network on the test data: 50.1875%
