In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

class TTMClassifier:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        """
        TTM Stages classifier using SentenceTransformer embeddings + Logistic Regression
        
        Ordinal stages:
        0 = Precontemplation
        1 = Contemplation  
        2 = Preparation
        3 = Action
        4 = Maintenance
        
        Args:
            model_name: Name of the sentence transformer model to use
        """
        print(f"Loading sentence transformer model: {model_name}")
        self.embedder = SentenceTransformer(model_name)
        
        # Use logistic regression for better probability alignment
        self.classifier = LogisticRegression(
            multi_class='multinomial',
            solver='lbfgs',
            class_weight='balanced',
            max_iter=1000,
            random_state=42
        )
        
        # Define ordinal mapping for TTM stages
        self.stage_to_ordinal = {
            "precontemplation": 0,
            "contemplation": 1,
            "preparation": 2,
            "action": 3,
            "maintenance": 4
        }
        
        self.ordinal_to_stage = {v: k for k, v in self.stage_to_ordinal.items()}
        self.label_encoder = None  # Will store class mapping
        self.is_trained = False
        
    def train(self, train_data, verbose=True):
        """
        Train the classifier on the provided data
        
        Args:
            train_data: Dict with format {"stage_name": [texts...]}
            verbose: Whether to print training progress
        """
        if verbose:
            print("Starting training...")
            print(f"Using {self.classifier.__class__.__name__} classifier")
            
        # Prepare training data
        texts = []
        labels = []
        
        for stage_name, stage_texts in train_data.items():
            if stage_name not in self.stage_to_ordinal:
                print(f"Warning: '{stage_name}' not in predefined ordinal mapping. Adding dynamically.")
                max_ordinal = max(self.stage_to_ordinal.values()) if self.stage_to_ordinal else -1
                self.stage_to_ordinal[stage_name] = max_ordinal + 1
                self.ordinal_to_stage[max_ordinal + 1] = stage_name
            
            # Use stage names directly as labels
            texts.extend(stage_texts)
            labels.extend([stage_name] * len(stage_texts))
            
            if verbose:
                print(f"Added {len(stage_texts)} examples for '{stage_name}'")

        # Create label encoder mapping
        unique_labels = sorted(set(labels))
        self.label_encoder = {label: idx for idx, label in enumerate(unique_labels)}
        self.inverse_encoder = {idx: label for label, idx in self.label_encoder.items()}
        
        # Convert labels to numerical values
        y = np.array([self.label_encoder[label] for label in labels])
        
        if verbose:
            print(f"Total training examples: {len(texts)}")
            print("Label encoding:", self.label_encoder)
            print("Generating embeddings...")
            
        # Generate embeddings
        embeddings = self.embedder.encode(texts, show_progress_bar=verbose)
        
        if verbose:
            print(f"Embedding shape: {embeddings.shape}")
            print("Training classifier...")
            
        # Train classifier
        self.classifier.fit(embeddings, y)
        self.is_trained = True
        
        if verbose:
            print("Training completed!")
            
        return self
    
    def predict(self, texts, return_embeddings=False, return_ordinal=False):
        """
        Predict TTM stages for the given texts
        
        Args:
            texts: Single text string or list of texts
            return_embeddings: Whether to return embeddings as well
            return_ordinal: Whether to return ordinal scores
            
        Returns:
            Dict with predictions, probabilities, and optional embeddings/ordinal
        """
        if not self.is_trained:
            raise ValueError("Model must be trained before making predictions!")
            
        # Handle single text input
        single_input = isinstance(texts, str)
        if single_input:
            texts = [texts]
            
        # Generate embeddings
        embeddings = self.embedder.encode(texts, show_progress_bar=False)
        
        # Get predictions and probabilities
        label_indices = self.classifier.predict(embeddings)
        probabilities = self.classifier.predict_proba(embeddings)
        
        # Format results
        results = []
        for i, (label_idx, probs) in enumerate(zip(label_indices, probabilities)):
            # Get stage name from encoded label
            predicted_stage = self.inverse_encoder[label_idx]
            
            # Get confidence (probability of predicted class)
            confidence = float(probs[label_idx])
            
            # Map probabilities to stage names
            prob_dict = {}
            for class_idx, prob in enumerate(probs):
                stage_name = self.inverse_encoder[class_idx]
                prob_dict[stage_name] = float(prob)
            
            # Get ordinal value if needed
            ordinal_value = self.stage_to_ordinal.get(predicted_stage, -1)
            
            result = {
                "text": texts[i],
                "predicted_label": predicted_stage,
                "confidence": confidence,
                "probabilities": prob_dict
            }
            
            if return_ordinal:
                result["predicted_ordinal"] = ordinal_value
                # Add ordinal probabilities
                ordinal_prob_dict = {}
                for stage, prob in prob_dict.items():
                    ordinal = self.stage_to_ordinal.get(stage, -1)
                    ordinal_prob_dict[ordinal] = prob
                result["ordinal_probabilities"] = ordinal_prob_dict
                
            if return_embeddings:
                result["embedding"] = embeddings[i]
                
            results.append(result)
        
        # Return single result if single input
        return results[0] if single_input else results
    
    def evaluate_similarity(self, text1, text2):
        """
        Calculate cosine similarity between two texts
        
        Args:
            text1, text2: Texts to compare
            
        Returns:
            Similarity score between 0 and 1
        """
        embeddings = self.embedder.encode([text1, text2])
        similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
        return float(similarity)
    
    def get_ordinal_progression(self, text):
        """
        Get progression probabilities across all TTM stages in ordinal order
        
        Args:
            text: Input text to analyze
            
        Returns:
            Dict with ordinal progression analysis
        """
        result = self.predict(text, return_ordinal=True)
        
        # Get ordinal probabilities and sort
        ordinal_probs = result.get("ordinal_probabilities", {})
        sorted_stages = sorted(ordinal_probs.items())
        
        progression = {
            "text": text,
            "predicted_stage": result["predicted_label"],
            "confidence": result["confidence"],
            "ordinal_progression": sorted_stages,
            "stage_progression": [(self.ordinal_to_stage[ord_val], prob) 
                                  for ord_val, prob in sorted_stages]
        }
        
        return progression
    
    def get_model_info(self):
        """Get information about the loaded model"""
        return {
            "sentence_model": str(self.embedder),
            "max_seq_length": self.embedder.max_seq_length,
            "embedding_dimension": self.embedder.get_sentence_embedding_dimension(),
            "classifier": str(self.classifier),
            "ordinal_mapping": self.stage_to_ordinal,
            "label_encoder": self.label_encoder,
            "is_trained": self.is_trained,
            "classes": list(self.label_encoder.keys()) if self.label_encoder else None
        }

# TTM Stages Training Dataset (with placeholder preparation stage)
train_dataset = {
    "precontemplation": [
        "I normally just deal with it or forget about it, but speaking to my close friends, they suggested i get some help to deal with it, as it is definitely affecting my relationships. ",
        "I am hoping to be offered counselling and to have some understanding from my course directors about why I may miss labs etc",
        "It started with a simple mental health assessment, where I wanted to know if I was experiencing a moment of negativity or a sign of depression.",
        "I have had tougher times in the past but was too stubborn to seek help. ",
        "Someone to tell me that it's all in my head and that I'm fine really or just someone to give me advice on how to deal with what I'm feeling I honestly don't know really I don't know if what's going on is normal or not",
        "because it feels like everything should be perfect in my life but it somehow isn't. ",
        "Been putting it off for many years as I thought I could just deal with it and was worried about bringing up issues from the past that I had forgotten.",
        "my mother believes it will help me to get counselling, and see if I have any mental problems.",
        "the therapist I was seeing on my year aboard told me I need to be accessing this service",
        "I have been recommended to have counselling to help",
        "they said I'm going through a slump. "
    ],
    "contemplation": [
        "I hope to be able to at least partially process what has happened and heal from it; see if I could benefit from talking to a sex therapist maybe",
        "It would really help to talk to someone about my uni and personal struggles",
        "I hope to understand how I can work to reduce my issues and this will hopefully help me with being more able to engage with my work.",
        "Some help to navigate everything going on at the moment and help managing my stress and help with my low mood and motivation",
        "I would like to find the source of my mental health problems, and learn of ways to help deal with them on a day to day basis.",
        "I need help and I do not know what to do.",
        "I don't know if I 100% needed therapy but I just wanted to see if it would help me feel better.",
        "because self-help resources haven't been enough and i feel like i need professional support to address certain issues.",
        "Some support or guidance on how to better deal with some of the emotions and thoughts that I am consistently having.",
        "I am looking for ways to get along with my family",
        "I am really keen to engage in some talking therapy to see what could help me. ",
        "I want to learn how to handle these situations mentioned above, and practise strategies into how I should approach these situations. Also, understand what triggers me.",
        "I'm hoping to just feel better and hopefully get my confidence back, I also just want a good nights sleep without lying for hours"
    ],
   "preparation": [
        "I'm preparing to start cognitive therapy next month",
        "I intend to begin addressing my anxiety after finals",
        "Getting ready to commit to weekly counseling sessions",
        "I plan to initiate therapy once I find the right provider",
        "Preparing myself to engage in trauma-focused treatment",
        "I'm arranging my schedule to accommodate regular therapy",
        "I'm preparing to start therapy next month",
        "I intend to address these issues soon",
        "Getting ready to make changes in the coming weeks",
        "I’m gathering information about support groups before I join one.",
        "I’ve started researching therapists so I can begin sessions soon.",
        "I’m planning out steps to reduce my stress after my current projects wrap up.",
        "I’m setting goals and making a list of changes I want to work on in the next month."
],
    "action": [
        "Speaking to my friends, family and academic tutor did help alleviate these feelings quite a bit",
        "Now, i chose to seek help because I think i should talk to someone about how i am feeling.",
        "Now, i chose to seek help because I think i should talk to someone about how i am feeling.",
        "i had a brief period of extreme anxiety that was affecting my sleep, and because of this i had sleeping pills prescribed to me.",
        "I have been trying for the last few months to implement things previous counsellors/supporters have suggested, but I don't think I can help myself any further without more professional input.",
        "I am working through this book right now that is hopefully going to help me change my attachment patterns in the long run",
        "I am currently getting referred for a formal ADHD diagnosis and support",
        "Integration of healthy coping stategies in my life",
        "decreasing the frequency at which I engage in less healthy ones (e.g. binge eating).",
        "Support BEYOND that which I am currently receiving, access to therapy through [ORG]",
        "I had some over the phone CBT sessions with the use of the [ORG] app",
        "I have been prescribe medication",
        "I have written them down in a notes app so it's easier for me to get across details and not forget things",
        "I've been talking to the wellbeing service"
    ],
    "maintenance": [
        "I have gotten myself out of tough times before and have a good support network, but I want the university to also be in the loop.",
        "I have experienced something similar a few years ago and I do not want to wait until it gets to that level of severity again.",
        "I've been struggling with anxiety for a few years but was able to cope relatively well.",
        "I have had help before primarily focused on anxiety and several techniques have helped me learn to control this",
        "I have been working on some habits and I have improved",
        "I found counselling so helpful but without it I feel so helpless and lost.",
        "improved outlook on life"
    ]
}


# Important note about synthetic data
PREPARATION_STAGE_NOTE = """
IMPORTANT LIMITATION: Preparation stage training data was synthetically constructed 
due to low representation in initial sample. Results for this stage should be 
interpreted cautiously and validated with real preparation-stage texts when available.
"""

# Create and train the classifier
print("="*70)
print("TRAINING TTM STAGES CLASSIFIER (TRANSFORMER EMBEDDINGS + LOGISTIC REGRESSION)")
print("="*70)
print(PREPARATION_STAGE_NOTE)

classifier = TTMClassifier()
classifier.train(train_dataset)


# Test sentences
test_sentences = [
    "help and assistance with how to deal with the issue",  # contemplation
    "hoping to have an outlet to talk about how I'm feeling and receive advice that will help me gain a different outlook on my circumstances and move forward in a positive way",  # contemplation
    "have been trying to access support for a while",  # action
    "help in accessing a diagnosis or support that will make it easier to cope", # contemplation
    "i would like help",  # contemplation
    "how I can convince myself to work.",  # contemplation
    "hopefully feel better in myself.",  # contemplation
    "I think it would really help to speak to someone about it.",  # contemplation
    "I want to have someone to talk to about the way I feel without judgement or feeling like I'm burdening them."  # contemplation
    "I am hoping to be able to go out independently & with friends and not feel so strange/anxious.",  # contemplation
    "I am hoping to overall improve my happiness.",  # contemplation
    "Try to get along with myself",  # contemplation
    "A chance to talk about the problems I am facing and to have measures put in place for my studies to help combat my mental health struggles",  # contemplation
    "I have also just started taking antidepressants.",  # action
    "my mental health fluctuates and I also think I might have OCD and don't know how to go about it",  # contemplation

]


print("\n" + "="*70)
print("TESTING PREDICTIONS:")
print("="*70)

for text in test_sentences:
    result = classifier.predict(text)
    print(f"\nText: '{text}'")
    print(f"Predicted: {result['predicted_label']} (confidence: {result['confidence']:.3f})")
    print("Stage Probabilities:")
    for stage, prob in result['probabilities'].items():
        print(f"  {stage}: {prob:.3f}")

print("\n" + "="*70)
print("ORDINAL PROGRESSION ANALYSIS:")
print("="*70)

for text in test_sentences[:2]:  # Just show first 2 for brevity
    progression = classifier.get_ordinal_progression(text)
    print(f"\nText: '{text[:50]}...'")
    print(f"Predicted: {progression['predicted_stage']} (confidence: {progression['confidence']:.3f})")
    print("Progression across stages:")
    for stage, prob in progression['stage_progression']:
        print(f"  {stage}: {prob:.3f}")

# Show model info
print("\n" + "="*70)
print("MODEL INFO:")
print("="*70)
info = classifier.get_model_info()
for key, value in info.items():
    print(f"{key}: {value}")

print(f"\nTraining data summary:")
for stage, sentences in train_dataset.items():
    print(f"  {stage}: {len(sentences)} sentences")