In [None]:
 !pip install pdf2image opencv-python-headless Pillow torch torchvision
!apt-get install -y poppler-utils
import cv2
from google.colab.patches import cv2_imshow  # For displaying images in Colab

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.5).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


PDF to Image Conversion

In [None]:
import os
import glob
from pdf2image import convert_from_path

# Convert PDFs in a folder to images
def extract_images_from_pdfs(folder_path, output_folder="/content/images"):
    os.makedirs(output_folder, exist_ok=True)
    pdf_files = sorted(glob.glob(os.path.join(folder_path, "*.pdf")))

    all_image_paths = []
    for pdf_file in pdf_files:
        pdf_name = os.path.splitext(os.path.basename(pdf_file))[0]
        images = convert_from_path(pdf_file)

        for i, img in enumerate(images):
            img_path = os.path.join(output_folder, f"{pdf_name}_page_{i+1}.png")
            img.save(img_path, "PNG")
            all_image_paths.append(img_path)

    return all_image_paths

# Run the function for your PDF folder path
pdf_folder_path = "/content/drive/MyDrive/Colab Notebooks/Design project/Dataset"
image_paths = extract_images_from_pdfs(pdf_folder_path)
print("Extracted images:", image_paths[:5])  # Display a few paths for confirmation


Extracted images: ['/content/images/202211003_page_1.png', '/content/images/202211005_page_1.png', '/content/images/202211007_page_1.png', '/content/images/202211008_page_1.png', '/content/images/202211009_page_1.png']


Image Preprocessing

In [None]:
import cv2

# Preprocess images (resize, threshold, etc.)
def preprocess_image(img_path):
    image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))  # Resize for consistency
    _, binary_image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    return binary_image

# Apply preprocessing to the first few images
processed_images = [preprocess_image(path) for path in image_paths[:5]]


Feature Encoding: Convert Handwriting Features to Binary-Coded Rows

In [None]:
import numpy as np

# Example of feature extraction function
def extract_features_from_image(binary_image):
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    features = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        baseline = "ascending" if y + h < binary_image.shape[0] / 2 else "descending" if y > binary_image.shape[0] / 2 else "leveled"
        connectivity = "strongly connected" if w / h > 1.5 else "medium connectivity" if w / h > 0.5 else "not connected"
        angle = np.arctan2(h, w) * 180 / np.pi
        slant = "vertical" if angle < 5 else "moderate right" if angle < 15 else "extreme right" if angle < 45 else "moderate left" if angle > 175 else "extreme left"
        area = cv2.contourArea(contour)
        pressure = "heavy" if area > 1000 else "medium" if area > 300 else "light"
        t_height = "not t"  # Logic for detecting 't'
        f_shape = "not f"  # Logic for detecting 'f'
        spacing = "evenly spaced"  # Logic to assess spacing

        features.append(generate_hm_row(baseline, connectivity, slant, pressure, t_height, f_shape, spacing))

    return features

def generate_hm_row(baseline, connectivity, slant, pressure, t_height, f_shape, spacing):
    baseline_code = 0 if baseline == "ascending" else 1 if baseline == "descending" else 2
    connectivity_code = 0 if connectivity == "strongly connected" else 1 if connectivity == "medium connectivity" else 2
    slant_code = 0 if slant == "vertical" else 1 if slant == "moderate right" else 2 if slant == "extreme right" else 3 if slant == "moderate left" else 4
    pressure_code = 0 if pressure == "light" else 1 if pressure == "medium" else 2
    t_height_code = 0 if t_height == "not t" else 1 if t_height == "very low" else 2 if t_height == "very high" else 3
    f_shape_code = 0 if f_shape == "not f" else 1
    spacing_code = 0 if spacing == "evenly spaced" else 1

    return [baseline_code, connectivity_code, slant_code, pressure_code, t_height_code, f_shape_code, spacing_code]

# Process images to extract features and create the handwriting map (HM)
handwriting_map = [extract_features_from_image(preprocess_image(path)) for path in image_paths]


Trait Extraction: Calculate and Normalize Big Five Trait Scores

In [None]:
import pandas as pd

# Load CSV with Big Five responses
file_path = '/content/drive/MyDrive/Colab Notebooks/Design project/Personality traits/Updated response.csv'
data = pd.read_csv(file_path)
data.columns = data.columns.str.strip()

# Define columns for each personality trait
trait_columns = {
    "Extraversion": ["I see myself as someone who is talkative.", "I see myself as someone who is full of energy.", "I see myself as someone who is outgoing, sociable."],
    "Agreeableness": ["I see myself as someone who is helpful and unselfish with others.", "I see myself as someone who is considerate and kind to almost everyone.", "I see myself as someone who likes to cooperate with others."],
    "Conscientiousness": ["I see myself as someone who does a thorough job.", "I see myself as someone who makes plans and follows through with them.", "I see myself as someone who does things efficiently."],
    "Neuroticism": ["I see myself as someone who worries a lot.", "I see myself as someone who gets nervous easily.", "I see myself as someone who can be moody."],
    "Openness": ["I see myself as someone who is original, comes up with new ideas.", "I see myself as someone who likes to reflect, play with ideas.", "I see myself as someone who is curious about many different things."]
}

# Calculate average scores and normalize
trait_averages = pd.DataFrame({trait: data[cols].mean(axis=1) for trait, cols in trait_columns.items()})
trait_averages_normalized = trait_averages / 5
print(trait_averages_normalized.head())


   Extraversion  Agreeableness  Conscientiousness  Neuroticism  Openness
0      0.600000       0.600000           0.600000     0.600000  0.600000
1      0.666667       0.600000           0.600000     0.800000  0.800000
2      0.466667       0.866667           0.866667     0.733333  1.000000
3      0.400000       0.466667           0.400000     1.000000  0.800000
4      0.333333       0.733333           1.000000     0.333333  0.933333


Dataset Preparation: Combine Features and Traits

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset

class HandwritingPersonalityDataset(Dataset):
    def __init__(self, handwriting_map, trait_scores, max_letters=70):
        self.handwriting_map = handwriting_map
        self.trait_scores = trait_scores.values  # Convert DataFrame to numpy array
        self.max_letters = max_letters
        self.feature_size = 7  # Each letter has 7 binary-coded features
        self.total_features = 1610

    def __len__(self):
        return len(self.handwriting_map)

    def __getitem__(self, idx):
        # Get handwriting features and ensure consistent shape
        handwriting_features = np.array(self.handwriting_map[idx])

        # Pad or truncate the handwriting features to max_letters (70)
        if handwriting_features.shape[0] < self.max_letters:
            padding = np.zeros((self.max_letters - handwriting_features.shape[0], self.feature_size))
            handwriting_features = np.vstack([handwriting_features, padding])
        elif handwriting_features.shape[0] > self.max_letters:
            handwriting_features = handwriting_features[:self.max_letters]

        # Flatten to a single vector of size 490
        handwriting_features = handwriting_features.flatten()

        # Pad to reach exactly 1610 elements
        if handwriting_features.size < self.total_features:
            handwriting_features = np.pad(handwriting_features, (0, self.total_features - handwriting_features.size), mode='constant')

        # Convert to tensor
        handwriting_features = torch.tensor(handwriting_features, dtype=torch.float32)
        traits = torch.tensor(self.trait_scores[idx], dtype=torch.float32)

        return handwriting_features, traits

# Create dataset instance
dataset = HandwritingPersonalityDataset(handwriting_map, trait_averages_normalized)


5. Model Building and Training

FFM-NN Architecture

In [None]:
import torch
import torch.nn as nn

class FFM_NN(nn.Module):
    def __init__(self):
        super(FFM_NN, self).__init__()
        self.fc1 = nn.Linear(1610, 1850)
        self.tanh = nn.Tanh()
        self.fc2 = nn.Linear(1850, 5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.tanh(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Instantiate the model
model = FFM_NN()


Model Training

In [None]:
import torch.optim as optim

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.4)

# Training loop
epochs = 10
for epoch in range(epochs):
    for features, labels in dataset:
        # Ensure feature vector has correct shape
        features = features.view(1, -1)  # Reshape to (1, 1610) if needed

        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Loss: 0.06542123109102249
Epoch 2/10, Loss: 0.07645122706890106
Epoch 3/10, Loss: 0.07874710857868195
Epoch 4/10, Loss: 0.07948936522006989
Epoch 5/10, Loss: 0.07990677654743195
Epoch 6/10, Loss: 0.0802365317940712
Epoch 7/10, Loss: 0.08052666485309601
Epoch 8/10, Loss: 0.0807863399386406
Epoch 9/10, Loss: 0.0810171514749527
Epoch 10/10, Loss: 0.0812198668718338


Save the model

In [None]:
# After training
torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/Design project/personality_traits_model.pth')


In [None]:
import torch
import cv2
import numpy as np
from pdf2image import convert_from_path

# Function to convert a PDF to images
def extract_images_from_pdf(pdf_path):
    images = convert_from_path(pdf_path)
    return images

# Function to preprocess an image for model input
def preprocess_image(img):
    # Resize and convert to grayscale
    img = cv2.resize(img, (128, 128))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    return binary_image

# Function to extract features from a binary image and generate HM row
def extract_features_and_generate_hm_row(binary_image):
    # Implement feature extraction similar to training
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # Placeholder for detected features
    baseline, connecting_strokes, slant, pressure, t_height, f_shape, spacing = (
        "leveled", "medium connectivity", "vertical", "medium", "not t", "not f", "evenly spaced"
    )

    # Here you'd normally extract actual feature values from the image
    # But for simplicity, let's assume these default values for this example

    # Generate HM row using the same encoding logic as during training
    hm_row = generate_hm_row(baseline, connecting_strokes, slant, pressure, t_height, f_shape, spacing)
    return hm_row

# Function to make predictions on a new PDF
def predict_personality_traits(model, pdf_path):
    # Convert PDF to images
    images = extract_images_from_pdf(pdf_path)
    hm_rows = []

    # Process each image page
    for img in images:
        binary_image = preprocess_image(np.array(img))
        hm_row = extract_features_and_generate_hm_row(binary_image)
        hm_rows.append(hm_row)

    # Stack all HM rows and pad to required input shape if needed
    # Ensure hm_rows is 2D and convert to tensor
    input_data = torch.tensor(hm_rows, dtype=torch.float32).view(1, -1)  # Reshape to match model input

    # Make sure input size is correct
    if input_data.shape[1] < 1610:
        padding = torch.zeros((1, 1610 - input_data.shape[1]))  # Zero-pad to match model input size
        input_data = torch.cat((input_data, padding), dim=1)
    elif input_data.shape[1] > 1610:
        input_data = input_data[:, :1610]  # Trim if it's longer

    # Model prediction
    model.eval()
    with torch.no_grad():
        prediction = model(input_data)

    # Convert prediction to numpy and print results
    traits = ["Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"]
    trait_scores = prediction.numpy().flatten()
    result = dict(zip(traits, trait_scores))

    print("Predicted Personality Traits:")
    for trait, score in result.items():
        print(f"{trait}: {score:.2f}")

    return result

# Path to your trained model file
model_path = '/content/drive/MyDrive/Colab Notebooks/Design project/personality_traits_model.pth'
model = FFM_NN()  # Initialize your model class
model.load_state_dict(torch.load(model_path))  # Load model weights

# Predict personality traits from a PDF
pdf_path = '/content/drive/MyDrive/Colab Notebooks/Design project/Dataset/202211067.pdf'  # Path to the new PDF file
predict_personality_traits(model, pdf_path)


  model.load_state_dict(torch.load(model_path))  # Load model weights


Predicted Personality Traits:
Extraversion: 0.56
Agreeableness: 0.61
Conscientiousness: 0.58
Neuroticism: 0.57
Openness: 0.60


{'Extraversion': 0.5600668,
 'Agreeableness': 0.6127986,
 'Conscientiousness': 0.5787232,
 'Neuroticism': 0.5724024,
 'Openness': 0.6006152}

Accuracy test

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch

# Assuming `dataset` is a DataLoader with test data, similar to the training set
model.eval()  # Set model to evaluation mode
all_labels = []
all_predictions = []

with torch.no_grad():
    for features, labels in dataset:
        predictions = model(features).squeeze(0)  # Generate predictions
        all_labels.append(labels.numpy())         # Collect actual labels
        all_predictions.append(predictions.numpy()) # Collect predictions

# Convert lists to numpy arrays for metrics calculation
all_labels = np.vstack(all_labels)
all_predictions = np.vstack(all_predictions)

# Calculate MSE, MAE, and R-squared for each trait
mse = mean_squared_error(all_labels, all_predictions, multioutput='raw_values')
mae = mean_absolute_error(all_labels, all_predictions, multioutput='raw_values')
r2 = r2_score(all_labels, all_predictions, multioutput='raw_values')

# Print metrics for each trait
traits = ["Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"]
for i, trait in enumerate(traits):
    print(f"{trait} - MSE: {mse[i]:.4f}, MAE: {mae[i]:.4f}, R2: {r2[i]:.4f}")


Extraversion - MSE: 0.0265, MAE: 0.1319, R2: 0.0376
Agreeableness - MSE: 0.0231, MAE: 0.1180, R2: 0.0322
Conscientiousness - MSE: 0.0215, MAE: 0.1139, R2: 0.0097
Neuroticism - MSE: 0.0387, MAE: 0.1606, R2: -0.0056
Openness - MSE: 0.0197, MAE: 0.1083, R2: 0.0213


In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = '/content/drive/MyDrive/Colab Notebooks/Design project/Personality traits/Updated response.csv'
data = pd.read_csv(file_path)

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Define columns for each personality trait
trait_columns = {
    "Extraversion": [
        "I see myself as someone who is talkative.",
        "I see myself as someone who is full of energy.",
        "I see myself as someone who is outgoing, sociable."
    ],
    "Agreeableness": [
        "I see myself as someone who is helpful and unselfish with others.",
        "I see myself as someone who is considerate and kind to almost everyone.",
        "I see myself as someone who likes to cooperate with others."
    ],
    "Conscientiousness": [
        "I see myself as someone who does a thorough job.",
        "I see myself as someone who makes plans and follows through with them.",
        "I see myself as someone who does things efficiently."
    ],
    "Neuroticism": [
        "I see myself as someone who worries a lot.",
        "I see myself as someone who gets nervous easily.",
        "I see myself as someone who can be moody."
    ],
    "Openness": [
        "I see myself as someone who is original, comes up with new ideas.",
        "I see myself as someone who likes to reflect, play with ideas.",
        "I see myself as someone who is curious about many different things."
    ]
}

# Calculate the average score for each trait
trait_averages = pd.DataFrame()
for trait, cols in trait_columns.items():
    trait_averages[trait] = data[cols].mean(axis=1)

# Normalize the averages to be between 0 and 1
trait_averages_normalized = trait_averages / 5

# Define the model class
class FFM_NN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FFM_NN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.Tanh()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Initialize the model
input_size = 1610  # Update this with the actual number of input features
hidden_size = 1850
output_size = 5
model = FFM_NN(input_size, hidden_size, output_size)

# Load the trained model (assuming you saved it as 'personality_model.pth')
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Design project/personality_traits_model.pth'))
model.eval()

# Function to predict personality traits based on handwriting features
def predict_personality(features):
    features = torch.tensor(features, dtype=torch.float32)
    with torch.no_grad():
        outputs = model(features)
    return outputs.numpy()

# Example: Predict for some features (replace with actual handwriting features)
# Assuming you have a list of features for each sample (e.g., 1610 features for each sample)
sample_features = np.random.rand(1, 1610)  # Replace with actual feature data from your dataset
predictions = predict_personality(sample_features)

# Convert predictions to categories
def convert_to_category(score, thresholds):
    if score < thresholds[1]:
        return 0  # Low
    elif score < thresholds[2]:
        return 1  # Medium
    else:
        return 2  # High

# Define thresholds for each trait: Low, Medium, High
thresholds = {
    'Extraversion': [0.0, 0.33, 0.66, 1.0],  # Low, Medium, High
    'Agreeableness': [0.0, 0.33, 0.66, 1.0],
    'Conscientiousness': [0.0, 0.33, 0.66, 1.0],
    'Neuroticism': [0.0, 0.33, 0.66, 1.0],
    'Openness': [0.0, 0.33, 0.66, 1.0]
}

# Convert predicted traits to categories (treat predictions as scalars)
predicted_traits = {
    'Extraversion': predictions[0][0],  # Assuming predictions are scalar
    'Agreeableness': predictions[0][1],
    'Conscientiousness': predictions[0][2],
    'Neuroticism': predictions[0][3],
    'Openness': predictions[0][4]
}

# Convert the predictions and actual values into categories
predicted_categories = {trait: convert_to_category(pred, thresholds[trait]) for trait, pred in predicted_traits.items()}
actual_categories = {trait: convert_to_category(value, thresholds[trait]) for trait, value in trait_averages_normalized.iloc[0].items()}

# Calculate accuracy for each trait
accuracy = {trait: int(predicted_categories[trait] == actual_categories[trait]) for trait in predicted_categories}

print("Accuracy for each trait:")
for trait, acc in accuracy.items():
    print(f"{trait}: {acc}")


Accuracy for each trait:
Extraversion: 1
Agreeableness: 1
Conscientiousness: 1
Neuroticism: 1
Openness: 1


  model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Design project/personality_traits_model.pth'))


In [None]:

# import torch
# from PIL import Image
# import torchvision.transforms as transforms
# from pdf2image import convert_from_path
# import os
# import shutil
# import pandas as pd

# # Load the original CSV with trait averages
# file_path = '/content/drive/MyDrive/Design Project/Updated response.csv'
# data = pd.read_csv(file_path)
# data.columns = data.columns.str.strip()  # Remove any extra whitespace

# # Define the trait columns for extraction
# trait_columns = {
#     "Extraversion": ["I see myself as someone who is talkative.", "I see myself as someone who is full of energy.", "I see myself as someone who is outgoing, sociable."],
#     "Agreeableness": ["I see myself as someone who is helpful and unselfish with others.", "I see myself as someone who is considerate and kind to almost everyone.", "I see myself as someone who likes to cooperate with others."],
#     "Conscientiousness": ["I see myself as someone who does a thorough job.", "I see myself as someone who makes plans and follows through with them.", "I see myself as someone who does things efficiently."],
#     "Neuroticism": ["I see myself as someone who worries a lot.", "I see myself as someone who gets nervous easily.", "I see myself as someone who can be moody."],
#     "Openness": ["I see myself as someone who is original, comes up with new ideas.", "I see myself as someone who likes to reflect, play with ideas.", "I see myself as someone who is curious about many different things."]
# }

# # Calculate the average score for each trait and normalize
# trait_averages = pd.DataFrame()
# for trait, cols in trait_columns.items():
#     trait_averages[trait] = data[cols].mean(axis=1)

# # Normalize the averages to be between 0 and 1
# trait_averages_normalized = trait_averages / 5

# # Get the list of labels for each person (ground truth)
# trait_labels = trait_averages_normalized.values.tolist()

# # Ensure each page in the PDF corresponds to a person in the CSV (assuming the pages are in the same order)
# # Extract the correct trait labels for testing purposes (assuming a 1:1 mapping between pages and individuals in the CSV)
# # The assumption here is that each PDF corresponds to one individual's traits (this may need to be adjusted based on your dataset).

# # Define the model class again if not already defined
# class HandwritingPersonalityModel(nn.Module):
#     def __init__(self):
#         super(HandwritingPersonalityModel, self).__init__()
#         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 3 channels for RGB images
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
#         self.fc1 = nn.Linear(128 * 28 * 28, 512)  # Adjust size based on input image dimensions
#         self.fc2 = nn.Linear(512, 5)  # 5 output neurons for Big Five Traits

#         self.relu = nn.ReLU()
#         self.maxpool = nn.MaxPool2d(2, 2)

#     def forward(self, x):
#         x = self.relu(self.conv1(x))
#         x = self.maxpool(x)
#         x = self.relu(self.conv2(x))
#         x = self.maxpool(x)
#         x = self.relu(self.conv3(x))
#         x = self.maxpool(x)
#         x = x.view(x.size(0), -1)  # Flatten the tensor
#         x = self.relu(self.fc1(x))
#         x = self.fc2(x)  # Output 5 trait predictions
#         return x

# # Load the trained model
# model_path = "/content/drive/MyDrive/Design Project/model_state.pth"  # Path where the model is saved
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = HandwritingPersonalityModel()
# model.load_state_dict(torch.load(model_path, map_location=device))
# model.to(device)
# model.eval()  # Set model to evaluation mode

# # Define the preprocessing function
# def preprocess_image(img_path):
#     preprocess = transforms.Compose([
#         transforms.Resize((224, 224)),  # Resize to 224x224
#         transforms.ToTensor(),         # Convert image to tensor
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
#     ])
#     image = Image.open(img_path).convert("RGB")  # Ensure image is in RGB mode
#     return preprocess(image).unsqueeze(0)  # Add batch dimension


# # Function to extract images from PDF
# def extract_images_from_pdf(pdf_path, output_folder="temp_images"):
#     if os.path.exists(output_folder):
#         shutil.rmtree(output_folder)
#     os.makedirs(output_folder, exist_ok=True)

#     images = convert_from_path(pdf_path)
#     image_paths = []
#     for i, image in enumerate(images):
#         img_path = os.path.join(output_folder, f"page_{i+1}.png")
#         image.save(img_path, "PNG")
#         image_paths.append(img_path)
#     return image_paths


# # Function to generate predictions and compare them with the true labels (trait_averages_normalized)
# def predict_traits_from_pdf(pdf_path, ground_truth_labels):
#     # Extract images from the PDF
#     image_paths = extract_images_from_pdf(pdf_path)
#     predictions_list = []
#     ground_truth_dict = {}

#     for i, img_path in enumerate(image_paths):
#         # Preprocess the image
#         image_tensor = preprocess_image(img_path).to(device)

#         # Make predictions
#         with torch.no_grad():
#             output = model(image_tensor)

#         # Convert the output to CPU and extract the numpy array
#         predictions = output.cpu().numpy()[0]

#         # Map predictions to Big Five traits
#         traits = ["Extraversion", "Agreeableness", "Conscientiousness", "Neuroticism", "Openness"]
#         predicted_traits = dict(zip(traits, predictions))
#         predictions_list.append(predicted_traits)

#         # Compare with the corresponding ground truth (labels)
#         if i < len(ground_truth_labels):  # Ensure we don't go out of bounds
#             ground_truth_dict[f"Page {i+1}"] = {
#                 "Predicted": predicted_traits,
#                 "Ground Truth": ground_truth_labels[i]
#             }

#     return ground_truth_dict

# # Example usage
# pdf_path = "/content/drive/MyDrive/Design Project/Dataset/202211008.pdf"  # Path to a new PDF
# ground_truth_comparison = predict_traits_from_pdf(pdf_path, trait_labels)

# # Print the comparison of predicted and actual traits for each page
# for page, comparison in ground_truth_comparison.items():
#     print(f"{page}:")
#     print(f"  Predicted Traits: {comparison['Predicted']}")
#     print(f"  Ground Truth: {comparison['Ground Truth']}")


In [1]:

import graphviz

# Create a Graphviz object
dot = graphviz.Digraph(format='png', engine='dot')

# Add nodes for the various parts of the model
dot.node('A', 'Handwriting Data\n(Handwritten Images)')
dot.node('B', 'Personality Data\n(Big Five Trait Scores)')
dot.node('C', 'Handwriting Feature\nExtraction (ResNet-50)')
dot.node('D', 'Personality Trait\nProcessing (Fully Connected Network)')
dot.node('E', 'Concatenate Handwriting\nFeatures + Personality Traits')
dot.node('F', 'Final Fully Connected\nLayer (Prediction Layer)')
dot.node('G', 'Output: 5 Big Five\nPersonality Traits')

# Add edges between the nodes to represent the flow
dot.edge('A', 'C', label='Extract Features (CNN)')
dot.edge('B', 'D', label='Process Traits (FCN)')
dot.edge('C', 'E', label='Concatenate Features')
dot.edge('D', 'E', label='Concatenate Features')
dot.edge('E', 'F', label='Feed into Fully Connected Layer')
dot.edge('F', 'G', label='Output Prediction')

# Render and display the diagram
dot.render('personality_prediction_diagram')

# Display the diagram (if running in Jupyter notebook)
from IPython.display import Image
Image(filename='personality_prediction_diagram.png')

ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH