In [1]:
import pandas as pd
import pymongo
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def upload_price_curves_to_mongodb():
    """
    Upload price curve data from CSV to MongoDB
    """
    
    # MongoDB connection
    try:
        # Update this with your MongoDB connection string
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        db = client[MONGODB_DB]
        collection = db['price_curves']
        
        print(f"Connected to MongoDB: {MONGODB_DB}")
        
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return
    
    # Read the CSV file
    try:
        # Update this path to your CSV file location
        csv_file_path = 'merchant_price_monthly.csv'
        df = pd.read_csv(csv_file_path)
        
        print(f"Loaded CSV with {len(df)} records")
        print(f"Columns: {list(df.columns)}")
        
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return
    
    # Process the data
    try:
        # Add curve name and metadata
        curve_name = "Aurora Jan 2025"
        
        # Convert time column to datetime for better handling
        df['date'] = pd.to_datetime(df['time'], format='%d/%m/%Y')
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['month_name'] = df['date'].dt.strftime('%B')
        
        # Add metadata columns
        df['curve'] = curve_name
        df['uploadedAt'] = datetime.now()
        df['updatedAt'] = datetime.now()
        
        print(f"Data processing complete. Sample record:")
        print(df.head(1).to_dict('records')[0])
        
    except Exception as e:
        print(f"Error processing data: {e}")
        return
    
    # Upload to MongoDB
    try:
        # Clear existing data for this curve if it exists
        existing_count = collection.count_documents({'curve': curve_name})
        if existing_count > 0:
            print(f"Found {existing_count} existing records for curve '{curve_name}'. Deleting...")
            collection.delete_many({'curve': curve_name})
        
        # Convert DataFrame to list of dictionaries
        records = df.to_dict('records')
        
        # Insert new data
        result = collection.insert_many(records)
        
        print(f"Successfully uploaded {len(result.inserted_ids)} records to MongoDB")
        print(f"Curve name: {curve_name}")
        
        # Verify upload
        total_records = collection.count_documents({'curve': curve_name})
        print(f"Verification: {total_records} records found in database for this curve")
        
        # Show some statistics
        print("\nData summary:")
        profiles = df['profile'].unique()
        types = df['type'].unique()
        states = df['state'].unique()
        date_range = f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}"
        
        print(f"Profiles: {profiles}")
        print(f"Types: {types}")
        print(f"States: {states}")
        print(f"Date range: {date_range}")
        print(f"Total combinations: {len(profiles)} profiles × {len(types)} types × {len(states)} states = {len(profiles) * len(types) * len(states)}")
        
    except Exception as e:
        print(f"Error uploading to MongoDB: {e}")
        return
    
    finally:
        client.close()
        print("MongoDB connection closed")

def test_mongodb_connection():
    """
    Test MongoDB connection before uploading
    """
    try:
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017/')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        
        # Test connection
        client.admin.command('ping')
        print("MongoDB connection successful!")
        
        # List databases
        databases = client.list_database_names()
        print(f"Available databases: {databases}")
        
        # Check if our database exists
        if MONGODB_DB in databases:
            db = client[MONGODB_DB]
            collections = db.list_collection_names()
            print(f"Collections in {MONGODB_DB}: {collections}")
        else:
            print(f"Database {MONGODB_DB} does not exist yet (will be created)")
        
        client.close()
        return True
        
    except Exception as e:
        print(f"MongoDB connection failed: {e}")
        return False

if __name__ == "__main__":
    print("=== Price Curve Upload Script ===")
    print("1. Testing MongoDB connection...")
    
    if test_mongodb_connection():
        print("\n2. Uploading price curve data...")
        upload_price_curves_to_mongodb()
    else:
        print("Please check your MongoDB connection settings and try again.")
    
    print("\n=== Script completed ===")

ModuleNotFoundError: No module named 'dotenv'