# ü§ñ Mindneox.ai Chatbot - FREE GPU Version

**Complete conversational AI with memory**

‚ö° 10x faster than Mac M1/M2  
üí∞ 100% FREE on Google Colab  
üß† Powered by Mistral-7B

---

## üìã Instructions:
1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU ‚Üí Save
2. **Run all cells** in order (Ctrl+F9 or Runtime ‚Üí Run all)
3. **Start chatting** when you see the interactive prompt!

---

## üî• Cell 1: Check GPU

In [None]:
import torch
print("=" * 60)
print("ü§ñ Mindneox.ai Chatbot - GPU Setup")
print("=" * 60)
print(f"\nCUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print("\nüéâ FREE GPU READY FOR CHATBOT!")
else:
    print("\n‚ùå Enable GPU: Runtime ‚Üí Change runtime type ‚Üí GPU ‚Üí Save")

## üì¶ Cell 2: Install Packages (3 minutes)

In [None]:
print("üì¶ Installing chatbot dependencies...")
print("‚è±Ô∏è  Takes about 3 minutes\n")

!pip install -q llama-cpp-python langchain langchain-core langchain-community
!pip install -q sentence-transformers transformers accelerate
!pip install -q pinecone-client redis

print("\n‚úÖ All packages installed!")
print("‚úÖ Ready to build chatbot with Pinecone!")

## üì• Cell 3: Download Mistral-7B Model (3 minutes)

In [None]:
import os
MODEL_FILE = "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"

if not os.path.exists(MODEL_FILE):
    print(f"üì• Downloading chatbot brain (4.37 GB)...")
    print("‚è±Ô∏è  Takes about 2-3 minutes\n")
    !wget --show-progress {MODEL_URL}
    
    if os.path.exists(MODEL_FILE):
        size = os.path.getsize(MODEL_FILE) / 1024**3
        print(f"\n‚úÖ Downloaded! Size: {size:.2f} GB")
    else:
        print("\n‚ùå Download failed. Try: Runtime ‚Üí Restart runtime")
else:
    size = os.path.getsize(MODEL_FILE) / 1024**3
    print(f"‚úÖ Chatbot brain ready! Size: {size:.2f} GB")

In [None]:
from pinecone import Pinecone, ServerlessSpec
import os

# Your Pinecone API key
PINECONE_API_KEY = "pcsk_4oPVPT_PXLxHyGVPeKAjYJLf7VwPG1Kq1YoNQGqXxzp62hPaYW9yt8Vs3uCYd1xqA4bFqn"
INDEX_NAME = "mindnex-responses"

print("=" * 60)
print("üóÑÔ∏è  Connecting to Pinecone Cloud Database")
print("=" * 60)

try:
    # Initialize Pinecone
    pc = Pinecone(api_key=PINECONE_API_KEY)
    
    # Check if index exists
    existing_indexes = [index.name for index in pc.list_indexes()]
    
    if INDEX_NAME in existing_indexes:
        index = pc.Index(INDEX_NAME)
        stats = index.describe_index_stats()
        vector_count = stats.get('total_vector_count', 0)
        print(f"\n‚úÖ Connected to Pinecone!")
        print(f"‚úÖ Index: {INDEX_NAME}")
        print(f"‚úÖ Vectors stored: {vector_count}")
        print(f"‚úÖ Ready to collect conversation data!")
    else:
        print(f"\n‚ö†Ô∏è  Index '{INDEX_NAME}' not found")
        print("Creating new index...")
        pc.create_index(
            name=INDEX_NAME,
            dimension=384,
            metric='cosine',
            spec=ServerlessSpec(cloud='aws', region='us-east-1')
        )
        index = pc.Index(INDEX_NAME)
        print(f"‚úÖ Created new index: {INDEX_NAME}")
    
    pinecone_enabled = True
    print("\n‚úÖ Pinecone ready to store all conversations!")
    
except Exception as e:
    print(f"\n‚ùå Pinecone connection failed: {e}")
    print("‚ö†Ô∏è  Chatbot will work but won't save to database")
    pinecone_enabled = False
    index = None

print("=" * 60)

## üóÑÔ∏è Cell 4: Connect to Pinecone (Data Storage)

## üß† Cell 5: Load AI Model on GPU

In [None]:
from llama_cpp import Llama
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from datetime import datetime

print("=" * 60)
print("üß† Loading Chatbot AI on FREE GPU")
print("=" * 60)

# Load model with GPU acceleration
llm = LlamaCpp(
    model_path="Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
    n_ctx=8192,  # Large context for long conversations
    n_threads=2,
    n_gpu_layers=-1,  # ALL layers on GPU
    n_batch=512,
    temperature=0.8,  # More creative for chat
    top_p=0.95,
    repeat_penalty=1.2,
    max_tokens=500,
    verbose=False
)

print("\n‚úÖ Chatbot AI loaded on GPU!")
print("‚úÖ Ready for conversations!")
print("=" * 60)

## ü§ñ Cell 6: Create Chatbot with Memory + Pinecone

In [None]:
from sentence_transformers import SentenceTransformer
import hashlib

# Load embedding model for Pinecone
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

class MindneoxChatbot:
    """Full-featured chatbot with conversation memory + Pinecone storage"""
    
    def __init__(self, llm, pinecone_index=None):
        self.llm = llm
        self.conversation_history = []
        self.start_time = datetime.now()
        self.pinecone_index = pinecone_index
        self.vectors_stored = 0
        
    def chat(self, user_message: str) -> str:
        """Send a message and get response"""
        
        # Build conversation context
        context = self._build_context()
        
        # Create prompt with history
        full_prompt = f"{context}\n\nUser: {user_message}\nAssistant:"
        
        # Generate response
        try:
            response = self.llm.invoke(full_prompt)
            
            # Clean up response
            response = response.strip()
            if response.startswith("Assistant:"):
                response = response[10:].strip()
            
            # Save to history
            self.conversation_history.append({
                'user': user_message,
                'assistant': response,
                'timestamp': datetime.now().isoformat()
            })
            
            # Store in Pinecone
            if pinecone_enabled and self.pinecone_index:
                self._store_in_pinecone(user_message, response)
            
            return response
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    def _store_in_pinecone(self, user_msg: str, bot_response: str):
        """Store conversation in Pinecone vector database"""
        try:
            # Create unique ID
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            unique_id = f"chat_{timestamp}_{hashlib.md5(user_msg.encode()).hexdigest()[:8]}"
            
            # Generate embedding
            combined_text = f"User: {user_msg}\nAssistant: {bot_response}"
            embedding = embedding_model.encode(combined_text).tolist()
            
            # Store in Pinecone
            self.pinecone_index.upsert(vectors=[{
                'id': unique_id,
                'values': embedding,
                'metadata': {
                    'user_message': user_msg,
                    'bot_response': bot_response,
                    'timestamp': datetime.now().isoformat(),
                    'source': 'google_colab_chat'
                }
            }])
            
            self.vectors_stored += 1
            
        except Exception as e:
            print(f"\n‚ö†Ô∏è  Pinecone storage failed: {e}")
    
    def _build_context(self) -> str:
        """Build conversation context from history"""
        
        context = "[INST] You are Mindneox.ai, a helpful AI assistant. You have conversations with users and remember previous messages.\n\n"
        
        # Add recent history (last 5 messages)
        recent = self.conversation_history[-5:]
        for msg in recent:
            context += f"User: {msg['user']}\n"
            context += f"Assistant: {msg['assistant']}\n\n"
        
        context += "[/INST]"
        return context
    
    def get_history(self) -> list:
        """Get conversation history"""
        return self.conversation_history
    
    def clear_history(self):
        """Clear conversation history"""
        self.conversation_history = []
        print("‚úÖ Conversation history cleared")
    
    def get_stats(self) -> dict:
        """Get chatbot statistics"""
        return {
            'total_messages': len(self.conversation_history),
            'vectors_stored': self.vectors_stored,
            'session_duration': str(datetime.now() - self.start_time).split('.')[0],
            'messages_per_minute': len(self.conversation_history) / max(1, (datetime.now() - self.start_time).total_seconds() / 60)
        }

# Create chatbot instance with Pinecone
chatbot = MindneoxChatbot(llm, pinecone_index=index if pinecone_enabled else None)

print("‚úÖ Chatbot initialized with memory!")
if pinecone_enabled:
    print("‚úÖ Pinecone storage ENABLED - All chats will be saved!")
else:
    print("‚ö†Ô∏è  Pinecone storage DISABLED")
print("‚úÖ Ready to chat!")

## üí¨ Cell 7: Interactive Chat Interface

**Commands:**
- Type your message to chat
- `history` - View conversation
- `stats` - See statistics (including Pinecone storage)
- `clear` - Clear history
- `quit` - Exit chat

**‚úÖ Every message is automatically saved to Pinecone!**

In [None]:
print("=" * 80)
print("üí¨ MINDNEOX.AI CHATBOT - Interactive Mode")
print("=" * 80)
print("\nü§ñ Hi! I'm Mindneox.ai, your AI assistant powered by FREE GPU!")
if pinecone_enabled:
    print("‚úÖ Pinecone enabled - All conversations will be saved!")
print("\nüìù Commands:")
print("   ‚Ä¢ Type your message to chat")
print("   ‚Ä¢ Type 'history' to see conversation")
print("   ‚Ä¢ Type 'stats' to see statistics")
print("   ‚Ä¢ Type 'clear' to clear history")
print("   ‚Ä¢ Type 'quit' to exit")
print("\n" + "=" * 80)

while True:
    # Get user input
    user_input = input("\nüòä You: ").strip()
    
    if not user_input:
        continue
    
    # Check for commands
    if user_input.lower() == 'quit':
        stats = chatbot.get_stats()
        print("\n" + "=" * 80)
        print("? Session Summary:")
        print(f"   Total messages: {stats['total_messages']}")
        if pinecone_enabled:
            print(f"   ‚úÖ Saved to Pinecone: {stats['vectors_stored']} conversations")
        print(f"   Duration: {stats['session_duration']}")
        print("=" * 80)
        print("\n?üëã Thanks for chatting! Goodbye!")
        break
    
    elif user_input.lower() == 'history':
        history = chatbot.get_history()
        if history:
            print("\nüìú Conversation History:")
            print("=" * 80)
            for i, msg in enumerate(history, 1):
                print(f"\n{i}. You: {msg['user']}")
                print(f"   Bot: {msg['assistant'][:100]}...")
            print("=" * 80)
        else:
            print("\nüìú No conversation history yet")
        continue
    
    elif user_input.lower() == 'stats':
        stats = chatbot.get_stats()
        print("\nüìä Chatbot Statistics:")
        print("=" * 80)
        print(f"   Messages: {stats['total_messages']}")
        if pinecone_enabled:
            print(f"   ‚úÖ Stored in Pinecone: {stats['vectors_stored']}")
        print(f"   Duration: {stats['session_duration']}")
        print(f"   Rate: {stats['messages_per_minute']:.1f} msg/min")
        print("=" * 80)
        continue
    
    elif user_input.lower() == 'clear':
        chatbot.clear_history()
        continue
    
    # Generate response
    print("\nü§ñ Mindneox.ai: ", end="", flush=True)
    
    start = datetime.now()
    response = chatbot.chat(user_input)
    duration = (datetime.now() - start).total_seconds()
    
    print(response)
    print(f"\n‚ö° Response time: {duration:.2f}s")
    if pinecone_enabled:
        print(f"‚úÖ Saved to Pinecone!")

## üß™ Cell 8: Quick Test (Optional)

Run this instead of Cell 7 if you want a quick test without interactive mode

In [None]:
print("üß™ Quick Test\n")

test_questions = [
    "Hi! What can you help me with?",
    "Tell me about yourself",
    "What's machine learning?",
]

for question in test_questions:
    print(f"üòä User: {question}")
    response = chatbot.chat(question)
    print(f"ü§ñ Bot: {response}\n")
    print("-" * 80 + "\n")

stats = chatbot.get_stats()
print(f"üìä Stats: {stats['total_messages']} messages in {stats['session_duration']}")

## üíæ Cell 9: Export Conversation

In [None]:
def export_conversation():
    """Export conversation to text file"""
    
    history = chatbot.get_history()
    
    if not history:
        print("No conversation to export")
        return
    
    filename = f"mindneox_chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
    
    with open(filename, 'w') as f:
        f.write("=" * 80 + "\n")
        f.write("MINDNEOX.AI CONVERSATION EXPORT\n")
        f.write("=" * 80 + "\n\n")
        
        for i, msg in enumerate(history, 1):
            f.write(f"Message {i}\n")
            f.write(f"Time: {msg['timestamp']}\n")
            f.write(f"User: {msg['user']}\n")
            f.write(f"Bot: {msg['assistant']}\n")
            f.write("\n" + "-" * 80 + "\n\n")
        
        stats = chatbot.get_stats()
        f.write("\nStatistics:\n")
        f.write(f"Total Messages: {stats['total_messages']}\n")
        f.write(f"Duration: {stats['session_duration']}\n")
    
    print(f"‚úÖ Conversation exported to: {filename}")
    return filename

# Run this to export your conversation
export_conversation()

## üìä Cell 10: Performance Stats + Pinecone Check

In [None]:
print("=" * 80)
print("üìä CHATBOT PERFORMANCE STATS")
print("=" * 80)

# Chatbot stats
stats = chatbot.get_stats()
print(f"\nü§ñ Chatbot:")
print(f"   Messages: {stats['total_messages']}")
print(f"   Duration: {stats['session_duration']}")
print(f"   Rate: {stats['messages_per_minute']:.1f} msg/min")

# Pinecone stats
if pinecone_enabled and index:
    print(f"\nüóÑÔ∏è  Pinecone Database:")
    try:
        index_stats = index.describe_index_stats()
        total_vectors = index_stats.get('total_vector_count', 0)
        print(f"   ‚úÖ Connected: YES")
        print(f"   Total vectors in database: {total_vectors}")
        print(f"   Stored this session: {stats['vectors_stored']}")
        print(f"   Index: {INDEX_NAME}")
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Error getting stats: {e}")
else:
    print(f"\nüóÑÔ∏è  Pinecone Database:")
    print(f"   ‚ùå Not connected")

# GPU stats
if torch.cuda.is_available():
    print(f"\nüî• GPU:")
    print(f"   Name: {torch.cuda.get_device_name(0)}")
    print(f"   Total VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print(f"   Used VRAM: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
    print(f"   Cached: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")
    
    print("\nüí° Performance:")
    print(f"   Platform: Google Colab FREE")
    print(f"   Speed: 40-60 tokens/sec")
    print(f"   Cost: $0 (FREE!)")
    print(f"   vs Mac: 10x faster")

print("=" * 80)