In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import warnings
import sklearn.base
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.base")


In [62]:
class TherapyDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.data = dataframe.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

        # Encode categorical variables
        self.gender_encoder = LabelEncoder()
        self.data['gender_encoded'] = self.gender_encoder.fit_transform(self.data['Gender'])

        # Encode Music and Meditation Therapies
        self.music_therapy_encoder = LabelEncoder()
        self.meditation_therapy_encoder = LabelEncoder()
        self.data['music_therapy_encoded'] = self.music_therapy_encoder.fit_transform(self.data['Music Therapy'].fillna('None'))
        self.data['meditation_therapy_encoded'] = self.meditation_therapy_encoder.fit_transform(self.data['Meditation Therapy'].fillna('None'))

        # Scale numerical features
        self.scaler = StandardScaler()
        self.data[['Age', 'Stress Level']] = self.scaler.fit_transform(self.data[['Age', 'Stress Level']])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        age = self.data.loc[idx, 'Age']
        gender = self.data.loc[idx, 'gender_encoded']
        stress_level = self.data.loc[idx, 'Stress Level']
        text = f"Age {age:.2f} Gender {gender} Stress {stress_level:.2f}"
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'music_therapy_label': torch.tensor(self.data.loc[idx, 'music_therapy_encoded'], dtype=torch.long),
            'meditation_therapy_label': torch.tensor(self.data.loc[idx, 'meditation_therapy_encoded'], dtype=torch.long),
            'age': torch.tensor(age, dtype=torch.float),
            'gender': torch.tensor(gender, dtype=torch.long),
            'stress_level': torch.tensor(stress_level, dtype=torch.float)
        }


In [63]:
class TherapyRecommendationModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, music_therapy_classes, meditation_therapy_classes):
        super().__init__()
        text_embed_dim = embedding_dim
        feature_embed_dim = embedding_dim // 4

        self.text_embedding = nn.Embedding(vocab_size, text_embed_dim)
        self.age_embedding = nn.Linear(1, feature_embed_dim)
        self.gender_embedding = nn.Embedding(2, feature_embed_dim)
        self.stress_embedding = nn.Linear(1, feature_embed_dim)
        self.feature_projector = nn.Linear(feature_embed_dim * 3, text_embed_dim)
        self.feature_fusion = nn.TransformerEncoderLayer(
            d_model=text_embed_dim,
            nhead=4,
            dim_feedforward=text_embed_dim * 2,
            dropout=0.1,
            batch_first=True
        )
        self.music_therapy_classifier = nn.Sequential(
            nn.Linear(text_embed_dim, text_embed_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(text_embed_dim // 2, music_therapy_classes)
        )
        self.meditation_therapy_classifier = nn.Sequential(
            nn.Linear(text_embed_dim, text_embed_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(text_embed_dim // 2, meditation_therapy_classes)
        )

    def forward(self, input_ids, age, gender, stress_level):
        text_embed = self.text_embedding(input_ids)
        age_embed = self.age_embedding(age.unsqueeze(1))
        gender_embed = self.gender_embedding(gender)
        stress_embed = self.stress_embedding(stress_level.unsqueeze(1))
        additional_features = torch.cat([age_embed, gender_embed, stress_embed], dim=1)
        projected_features = self.feature_projector(additional_features).unsqueeze(1)
        combined_features = torch.cat([text_embed, projected_features], dim=1)
        fused_features = self.feature_fusion(combined_features)
        pooled_features = fused_features[:, 0]
        music_therapy_logits = self.music_therapy_classifier(pooled_features)
        meditation_therapy_logits = self.meditation_therapy_classifier(pooled_features)
        return music_therapy_logits, meditation_therapy_logits


In [64]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    best_val_loss = float('inf')
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            music_labels = batch['music_therapy_label'].to(device)
            meditation_labels = batch['meditation_therapy_label'].to(device)
            age = batch['age'].to(device)
            gender = batch['gender'].to(device)
            stress_level = batch['stress_level'].to(device)
            music_logits, meditation_logits = model(input_ids, age, gender, stress_level)
            music_loss = criterion(music_logits, music_labels)
            meditation_loss = criterion(meditation_logits, meditation_labels)
            total_loss = music_loss + meditation_loss
            total_loss.backward()
            optimizer.step()
            total_train_loss += total_loss.item()
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                music_labels = batch['music_therapy_label'].to(device)
                meditation_labels = batch['meditation_therapy_label'].to(device)
                age = batch['age'].to(device)
                gender = batch['gender'].to(device)
                stress_level = batch['stress_level'].to(device)
                music_logits, meditation_logits = model(input_ids, age, gender, stress_level)
                music_loss = criterion(music_logits, music_labels)
                meditation_loss = criterion(meditation_logits, meditation_labels)
                total_loss = music_loss + meditation_loss
                total_val_loss += total_loss.item()
        avg_train_loss = total_train_loss / len(train_loader)
        avg_val_loss = total_val_loss / len(val_loader)
        print(f"Epoch {epoch+1}: Train Loss: {avg_train_loss} Val Loss: {avg_val_loss}")
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save({
                'model_state_dict': model.state_dict(),
                'music_therapy_encoder': train_loader.dataset.music_therapy_encoder,
                'meditation_therapy_encoder': train_loader.dataset.meditation_therapy_encoder
            }, 'best_therapy_model.pth')
    return best_val_loss


In [65]:
def save_preprocessing_objects(train_dataset, model_path='preprocessing_objects.pkl'):
    
    preprocessing_objects = {
        'music_therapy_encoder': train_dataset.music_therapy_encoder,
        'meditation_therapy_encoder': train_dataset.meditation_therapy_encoder,
        'gender_encoder': train_dataset.gender_encoder,
        'scaler': train_dataset.scaler
    }
    with open(model_path, 'wb') as f:
        pickle.dump(preprocessing_objects, f)
    print(f"Preprocessing objects saved to {model_path}")


In [66]:
def main():
    BATCH_SIZE = 32
    MAX_LEN = 128
    EMBEDDING_DIM = 256
    EPOCHS = 10
    LEARNING_RATE = 1e-4
    try:
        df = pd.read_csv('Extended_Sleep_Health_Dataset.csv')  
    except FileNotFoundError:
        print("Dataset file not found. Please provide the correct path.")
        return
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    VOCAB_SIZE = len(tokenizer.vocab)
    MUSIC_THERAPY_CLASSES = df['Music Therapy'].nunique()
    MEDITATION_THERAPY_CLASSES = df['Meditation Therapy'].nunique()
    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
    train_dataset = TherapyDataset(train_df, tokenizer, MAX_LEN)
    val_dataset = TherapyDataset(val_df, tokenizer, MAX_LEN)
    save_preprocessing_objects(train_dataset)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    print("Number of training samples:", len(train_df))
    print("Number of validation (test) samples:", len(val_df))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    model = TherapyRecommendationModel(
        vocab_size=VOCAB_SIZE,
        embedding_dim=EMBEDDING_DIM,
        music_therapy_classes=MUSIC_THERAPY_CLASSES,
        meditation_therapy_classes=MEDITATION_THERAPY_CLASSES
    ).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    best_loss = train_model(
        model,
        train_loader,
        val_loader,
        criterion,
        optimizer,
        EPOCHS,
        device
    )
    print(f"Best Validation Loss: {best_loss}")


In [67]:
def recommend_therapies(age, gender, stress_level, model_path='best_therapy_model.pth', preprocessor_path='Documents/latest proj/preprocessing_objects.pkl'):
    with open(preprocessor_path, 'rb') as f:
        preprocessors = pickle.load(f)
    checkpoint = torch.load(model_path)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = TherapyRecommendationModel(
        vocab_size=len(tokenizer.vocab),
        embedding_dim=256,
        music_therapy_classes=preprocessors['music_therapy_encoder'].classes_.shape[0],
        meditation_therapy_classes=preprocessors['meditation_therapy_encoder'].classes_.shape[0]
    ).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    age_scaled = preprocessors['scaler'].transform([[age, stress_level]])[0][0]
    stress_scaled = preprocessors['scaler'].transform([[age, stress_level]])[0][1]
    gender_encoded = preprocessors['gender_encoder'].transform([gender])[0]
    text = f"Age {age_scaled:.2f} Gender {gender_encoded} Stress {stress_scaled:.2f}"
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    input_ids = encoding['input_ids'].to(device)
    age_tensor = torch.tensor([age_scaled], dtype=torch.float).to_device()
    gender_tensor = torch.tensor([gender_encoded], dtype=torch.long).to(device)
    stress_tensor = torch.tensor([stress_scaled], dtype=torch.float).to(device)
    music_logits, meditation_logits = model(
        input_ids,
        age_tensor,
        gender_tensor,
        stress_tensor
    )
    music_pred = torch.argmax(music_logits, dim=1).cpu().numpy()[0]
    meditation_pred = torch.argmax(meditation_logits, dim=1).cpu().numpy()[0]
    recommended_music = preprocessors['music_therapy_encoder'].inverse_transform([music_pred])[0]
    recommended_meditation = preprocessors['meditation_therapy_encoder'].inverse_transform([meditation_pred])[0]
    return recommended_music, recommended_meditation


In [68]:
if __name__ == "__main__":
    main()


Preprocessing objects saved to preprocessing_objects.pkl
Number of training samples: 859
Number of validation (test) samples: 215
Using device: cpu
Epoch 1: Train Loss: 2.2021958651366056 Val Loss: 2.216433048248291
Epoch 2: Train Loss: 2.2145985585671886 Val Loss: 2.2052130017961775
Epoch 3: Train Loss: 2.213670006504765 Val Loss: 2.185236385890416
Epoch 4: Train Loss: 2.207351057617753 Val Loss: 2.1999738216400146
Epoch 5: Train Loss: 2.205964273876614 Val Loss: 2.2010960238320485
Epoch 6: Train Loss: 2.200114294334694 Val Loss: 2.200970479420253
Epoch 7: Train Loss: 2.19952834977044 Val Loss: 2.2168294361659457
Epoch 8: Train Loss: 2.1848128813284413 Val Loss: 2.191601072038923
Epoch 9: Train Loss: 2.1892907354566784 Val Loss: 2.2234110151018416
Epoch 10: Train Loss: 2.19182077160588 Val Loss: 2.1935675144195557
Best Validation Loss: 2.185236385890416


In [69]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
import warnings
import sklearn.base
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.base")

class TherapyDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.data = dataframe.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

        # Encode categorical variables
        self.gender_encoder = LabelEncoder()
        self.data['gender_encoded'] = self.gender_encoder.fit_transform(self.data['Gender'])

        # Encode Music and Meditation Therapies
        self.music_therapy_encoder = LabelEncoder()
        self.meditation_therapy_encoder = LabelEncoder()
        self.data['music_therapy_encoded'] = self.music_therapy_encoder.fit_transform(self.data['Music Therapy'].fillna('None'))
        self.data['meditation_therapy_encoded'] = self.meditation_therapy_encoder.fit_transform(self.data['Meditation Therapy'].fillna('None'))

        # Scale numerical features
        self.scaler = StandardScaler()
        self.data[['Age', 'Stress Level']] = self.scaler.fit_transform(self.data[['Age', 'Stress Level']])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        age = self.data.loc[idx, 'Age']
        gender = self.data.loc[idx, 'gender_encoded']
        stress_level = self.data.loc[idx, 'Stress Level']
        text = f"Age {age:.2f} Gender {gender} Stress {stress_level:.2f}"
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'music_therapy_label': torch.tensor(self.data.loc[idx, 'music_therapy_encoded'], dtype=torch.long),
            'meditation_therapy_label': torch.tensor(self.data.loc[idx, 'meditation_therapy_encoded'], dtype=torch.long),
            'age': torch.tensor(age, dtype=torch.float),
            'gender': torch.tensor(gender, dtype=torch.long),
            'stress_level': torch.tensor(stress_level, dtype=torch.float)
        }

class TherapyRecommendationModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, music_therapy_classes, meditation_therapy_classes):
        super().__init__()
        text_embed_dim = embedding_dim
        feature_embed_dim = embedding_dim // 4

        self.text_embedding = nn.Embedding(vocab_size, text_embed_dim)
        self.age_embedding = nn.Linear(1, feature_embed_dim)
        self.gender_embedding = nn.Embedding(2, feature_embed_dim)
        self.stress_embedding = nn.Linear(1, feature_embed_dim)
        self.feature_projector = nn.Linear(feature_embed_dim * 3, text_embed_dim)
        self.feature_fusion = nn.TransformerEncoderLayer(
            d_model=text_embed_dim,
            nhead=4,
            dim_feedforward=text_embed_dim * 2,
            dropout=0.1,
            batch_first=True
        )
        self.music_therapy_classifier = nn.Sequential(
            nn.Linear(text_embed_dim, text_embed_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(text_embed_dim // 2, music_therapy_classes)
        )
        self.meditation_therapy_classifier = nn.Sequential(
            nn.Linear(text_embed_dim, text_embed_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(text_embed_dim // 2, meditation_therapy_classes)
        )

    def forward(self, input_ids, age, gender, stress_level):
        text_embed = self.text_embedding(input_ids)
        age_embed = self.age_embedding(age.unsqueeze(1))
        gender_embed = self.gender_embedding(gender)
        stress_embed = self.stress_embedding(stress_level.unsqueeze(1))
        additional_features = torch.cat([age_embed, gender_embed, stress_embed], dim=1)
        projected_features = self.feature_projector(additional_features).unsqueeze(1)
        combined_features = torch.cat([text_embed, projected_features], dim=1)
        fused_features = self.feature_fusion(combined_features)
        pooled_features = fused_features[:, 0]
        music_therapy_logits = self.music_therapy_classifier(pooled_features)
        meditation_therapy_logits = self.meditation_therapy_classifier(pooled_features)
        return music_therapy_logits, meditation_therapy_logits

def recommend_therapies(age, gender, stress_level, model_path='best_therapy_model.pth', preprocessor_path='preprocessing_objects.pkl'):
    # Load preprocessors and model
    with open(preprocessor_path, 'rb') as f:
        preprocessors = pickle.load(f)
    
    # Load the trained model checkpoint
    checkpoint = torch.load(model_path)
    
    # Prepare tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    # Set up device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Prepare model
    model = TherapyRecommendationModel(
        vocab_size=len(tokenizer.vocab), 
        embedding_dim=256, 
        music_therapy_classes=len(preprocessors['music_therapy_encoder'].classes_),
        meditation_therapy_classes=len(preprocessors['meditation_therapy_encoder'].classes_)
    ).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    # Scale and encode inputs
    age_stress_scaled = preprocessors['scaler'].transform([[age, stress_level]])[0]
    age_scaled, stress_scaled = age_stress_scaled[0], age_stress_scaled[1]
    
    gender_encoded = preprocessors['gender_encoder'].transform([gender])[0]
    
    # Prepare text input
    text = f"Age {age_scaled:.2f} Gender {gender_encoded} Stress {stress_scaled:.2f}"
    
    # Tokenize input
    encoding = tokenizer.encode_plus(
        text, 
        add_special_tokens=True, 
        max_length=128, 
        padding='max_length', 
        truncation=True, 
        return_tensors='pt'
    )
    
    # Prepare tensors
    input_ids = encoding['input_ids'].to(device)
    age_tensor = torch.tensor([age_scaled], dtype=torch.float).to(device)
    gender_tensor = torch.tensor([gender_encoded], dtype=torch.long).to(device)
    stress_tensor = torch.tensor([stress_scaled], dtype=torch.float).to(device)
    
    # Get predictions
    with torch.no_grad():
        music_logits, meditation_logits = model(
            input_ids, 
            age_tensor, 
            gender_tensor, 
            stress_tensor
        )
    
    # Get top 2 music therapies
    music_probs = torch.softmax(music_logits, dim=1).cpu().numpy()[0]
    music_top_indices = music_probs.argsort()[-2:][::-1]
    music_therapies = preprocessors['music_therapy_encoder'].inverse_transform(music_top_indices)
    music_confidences = music_probs[music_top_indices]
    
    # Get top 2 meditation therapies
    meditation_probs = torch.softmax(meditation_logits, dim=1).cpu().numpy()[0]
    meditation_top_indices = meditation_probs.argsort()[-2:][::-1]
    meditation_therapies = preprocessors['meditation_therapy_encoder'].inverse_transform(meditation_top_indices)
    meditation_confidences = meditation_probs[meditation_top_indices]
    
    return (
        list(zip(music_therapies, music_confidences)), 
        list(zip(meditation_therapies, meditation_confidences))
    )

def get_user_input():
    while True:
        try:
            print("\nTherapy Recommendation System")
            print("-" * 30)
            
            # Age input
            while True:
                age = input("Enter your age (16-36): ")
                try:
                    age = int(age)
                    if 16 <= age <= 36:
                        break
                    else:
                        print("Please enter a valid age between 16 and 36.")
                except ValueError:
                    print("Please enter a valid number.")
            
            # Gender input
            while True:
                gender = input("Enter your gender (Male/Female): ").strip().capitalize()
                if gender in ['Male', 'Female']:
                    break
                else:
                    print("Please enter either 'Male' or 'Female'.")
            
            # Stress level input
            while True:
                stress_level = input("Enter your stress level (1-10): ")
                try:
                    stress_level = int(stress_level)
                    if 1 <= stress_level <= 10:
                        break
                    else:
                        print("Please enter a stress level between 1 and 10.")
                except ValueError:
                    print("Please enter a valid number.")
            
            # Get recommendations
            music_recommendations, meditation_recommendations = recommend_therapies(age, gender, stress_level)
            
            # Display results
            print("\n--- Recommended Therapies ---")
            print("Top Music Therapies:")
            for therapy, confidence in music_recommendations:
                print(f"- {therapy} (Confidence: {confidence*100:.2f}%)")
            
            print("\nTop Meditation Therapies:")
            for therapy, confidence in meditation_recommendations:
                print(f"- {therapy} (Confidence: {confidence*100:.2f}%)")
            
            # Ask if user wants to continue
            continue_choice = input("\nDo you want to get another recommendation? (yes/no): ").lower()
            if continue_choice != 'yes':
                break
        
        except Exception as e:
            print(f"An error occurred: {e}")
            break

# Run the recommendation system
if __name__ == "__main__":
    get_user_input()


Therapy Recommendation System
------------------------------


Enter your age (16-36):  33
Enter your gender (Male/Female):  male
Enter your stress level (1-10):  5


  checkpoint = torch.load(model_path)



--- Recommended Therapies ---
Top Music Therapies:
- Singing: Purpose: to enhance brain activity (Confidence: 35.09%)
- Listening: Purpose: for overall relaxation (Confidence: 33.64%)

Top Meditation Therapies:
- Breathing Meditation: Purpose: to manage daily stress and improve emotional regulation; Technique: Balanced breath work with gentle visualization (Confidence: 37.78%)
- Body Scan Meditation: Purpose: to identify and address subtle stress indicators; Technique: Systematic muscle and emotional relaxation (Confidence: 32.25%)



Do you want to get another recommendation? (yes/no):  no
