In [null]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class CloudsOfThoughts(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, knowledge_dim, reasoning_steps=3):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.reasoning_steps = reasoning_steps
        self.knowledge_dim = knowledge_dim
        
        # Input understanding: Use linear instead of embedding for continuous input
        self.input_understanding = nn.Linear(embedding_dim, embedding_dim)
        
        # Knowledge base: A concept bank (simulating external knowledge)
        self.knowledge_base = nn.Parameter(torch.randn(knowledge_dim, embedding_dim))
        
        # Contextual Matching Layer (attention mechanism)
        self.attention_layer = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=8)
        
        # Refinement Layer: A multi-step reasoning layer (e.g., transformers, MLP)
        self.refinement_layer = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, embedding_dim)
        )
        
        # Output generation: Decode to final output
        self.output_layer = nn.Linear(embedding_dim, embedding_dim)
    
    def forward(self, input_tensor, knowledge_tensor=None):
        """
        input_tensor: [batch_size, seq_len, embedding_dim]
        knowledge_tensor: external knowledge used for matching [batch_size, knowledge_dim, embedding_dim]
                         (optional - if None, uses internal knowledge base)
        """
        B, T, D = input_tensor.size()

        # Step 1: Input Understanding - Transform input tensor
        understood_input = self.input_understanding(input_tensor)
        
        # Transpose for attention (seq_len, batch_size, embedding_dim)
        understood_input = understood_input.transpose(0, 1)
        
        # Prepare knowledge base for attention
        knowledge_base = self.knowledge_base.unsqueeze(1).repeat(1, B, 1)

        # Step 2: Contextual Matching - Perform attention over the knowledge base
        attention_output, _ = self.attention_layer(
            understood_input, 
            knowledge_base, 
            knowledge_base
        )
        
        # Transpose back (batch_size, seq_len, embedding_dim)
        attention_output = attention_output.transpose(0, 1)
        
        # Step 3: Reasoning / Refinement - Iterative steps
        refined_output = attention_output
        for step in range(self.reasoning_steps):
            refined_output = self.refinement_layer(refined_output)
        
        # Step 4: Output Generation - Decode the refined tensor
        output = self.output_layer(refined_output)
        
        return output

In [None]:
if __name__ == "__main__":
    batch_size = 2
    seq_len = 5
    embedding_dim = 64
    hidden_dim = 128
    knowledge_dim = 100
    reasoning_steps = 3

    # Create model
    model = CloudsOfThoughts(
        embedding_dim=embedding_dim,
        hidden_dim=hidden_dim,
        knowledge_dim=knowledge_dim,
        reasoning_steps=reasoning_steps
    )

    # Example input: random tensor (simulating words as vectors)
    input_tensor = torch.randn(batch_size, seq_len, embedding_dim)
    
    # Forward pass
    output_tensor = model(input_tensor)
    
    print("Input shape:", input_tensor.shape)
    print("Output shape:", output_tensor.shape) 