In [2]:
import pandas as pd
import pymongo
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def upload_price_curves_to_mongodb():
    """
    Upload price curve data from CSV to MongoDB
    """
    
    # MongoDB connection
    try:
        # MongoDB connection string - update .env file or use directly
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb+srv://ProjectHalo:5apsFwxTlqN8WHQR@cluster0.quuwlhb.mongodb.net/energy_contracts?retryWrites=true&w=majority&appName=Cluster0')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        db = client[MONGODB_DB]
        collection = db['price_curves']
        
        print(f"Connected to MongoDB: {MONGODB_DB}")
        
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return
    
    # Read the CSV file
    try:
        # Update this path to your CSV file location
        csv_file_path = 'merchant_price_monthly.csv'
        df = pd.read_csv(csv_file_path)
        
        print(f"Loaded CSV with {len(df)} records")
        print(f"Columns: {list(df.columns)}")
        
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return
    
    # Process the data
    try:
        # Add curve name and metadata
        curve_name = "Aurora Jan 2025"
        
        # Convert time column to datetime for better handling
        df['date'] = pd.to_datetime(df['time'], format='%d/%m/%Y')
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['month_name'] = df['date'].dt.strftime('%B')
        
        # Add metadata columns
        df['curve'] = curve_name
        df['uploadedAt'] = datetime.now()
        df['updatedAt'] = datetime.now()
        
        print(f"Data processing complete. Sample record:")
        print(df.head(1).to_dict('records')[0])
        
    except Exception as e:
        print(f"Error processing data: {e}")
        return
    
    # Upload to MongoDB
    try:
        # Clear existing data for this curve if it exists
        existing_count = collection.count_documents({'curve': curve_name})
        if existing_count > 0:
            print(f"Found {existing_count} existing records for curve '{curve_name}'. Deleting...")
            collection.delete_many({'curve': curve_name})
        
        # Convert DataFrame to list of dictionaries
        records = df.to_dict('records')
        
        # Insert new data
        result = collection.insert_many(records)
        
        print(f"Successfully uploaded {len(result.inserted_ids)} records to MongoDB")
        print(f"Curve name: {curve_name}")
        
        # Verify upload
        total_records = collection.count_documents({'curve': curve_name})
        print(f"Verification: {total_records} records found in database for this curve")
        
        # Show some statistics
        print("\nData summary:")
        profiles = df['profile'].unique()
        types = df['type'].unique()
        states = df['state'].unique()
        date_range = f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}"
        
        print(f"Profiles: {profiles}")
        print(f"Types: {types}")
        print(f"States: {states}")
        print(f"Date range: {date_range}")
        print(f"Total combinations: {len(profiles)} profiles × {len(types)} types × {len(states)} states = {len(profiles) * len(types) * len(states)}")
        
    except Exception as e:
        print(f"Error uploading to MongoDB: {e}")
        return
    
    finally:
        client.close()
        print("MongoDB connection closed")

def test_mongodb_connection():
    """
    Test MongoDB connection before uploading
    """
    try:
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb+srv://ProjectHalo:5apsFwxTlqN8WHQR@cluster0.quuwlhb.mongodb.net/energy_contracts?retryWrites=true&w=majority&appName=Cluster0')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        
        # Test connection
        client.admin.command('ping')
        print("MongoDB connection successful!")
        
        # List databases
        databases = client.list_database_names()
        print(f"Available databases: {databases}")
        
        # Check if our database exists
        if MONGODB_DB in databases:
            db = client[MONGODB_DB]
            collections = db.list_collection_names()
            print(f"Collections in {MONGODB_DB}: {collections}")
        else:
            print(f"Database {MONGODB_DB} does not exist yet (will be created)")
        
        client.close()
        return True
        
    except Exception as e:
        print(f"MongoDB connection failed: {e}")
        return False

if __name__ == "__main__":
    print("=== Price Curve Upload Script ===")
    print("1. Testing MongoDB connection...")
    
    if test_mongodb_connection():
        print("\n2. Uploading price curve data...")
        upload_price_curves_to_mongodb()
    else:
        print("Please check your MongoDB connection settings and try again.")
    
    print("\n=== Script completed ===")

=== Price Curve Upload Script ===
1. Testing MongoDB connection...
MongoDB connection successful!
Available databases: ['energy_contracts', 'admin', 'local']
Collections in energy_contracts: ['contracts']

2. Uploading price curve data...
Connected to MongoDB: energy_contracts
Loaded CSV with 10200 records
Columns: ['profile', 'type', 'state', 'time', 'price']
Data processing complete. Sample record:
{'profile': 'baseload', 'type': 'Energy', 'state': 'NSW', 'time': '1/07/2024', 'price': 143.6, 'date': Timestamp('2024-07-01 00:00:00'), 'year': 2024, 'month': 7, 'month_name': 'July', 'curve': 'Aurora Jan 2025', 'uploadedAt': Timestamp('2025-06-04 19:56:26.671349'), 'updatedAt': Timestamp('2025-06-04 19:56:26.672942')}
Successfully uploaded 10200 records to MongoDB
Curve name: Aurora Jan 2025
Verification: 10200 records found in database for this curve

Data summary:
Profiles: ['baseload' 'solar' 'wind']
Types: ['Energy' 'green']
States: ['NSW' 'QLD' 'SA' 'VIC']
Date range: 2024-07-01 to 

In [2]:
import pymongo
from datetime import datetime
import os

def create_green_baseload_prices():
    """
    Create baseload Green price data by copying from solar Green prices
    """
    
    # MongoDB connection - using your connection string directly
    try:
        # Use your MongoDB connection string directly
        MONGODB_URI = 'mongodb+srv://ProjectHalo:5apsFwxTlqN8WHQR@cluster0.quuwlhb.mongodb.net/energy_contracts?retryWrites=true&w=majority&appName=Cluster0'
        MONGODB_DB = 'energy_contracts'
        
        client = pymongo.MongoClient(MONGODB_URI)
        db = client[MONGODB_DB]
        collection = db['price_curves']
        
        print(f"Connected to MongoDB: {MONGODB_DB}")
        
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return
    
    try:
        # 1. Check current Green data
        print("\n=== Current Green Price Data ===")
        current_green = list(collection.find({"type": "green"}))
        print(f"Found {len(current_green)} existing Green records")
        
        # Show breakdown by profile
        profile_breakdown = {}
        for record in current_green:
            profile = record.get('profile', 'unknown')
            profile_breakdown[profile] = profile_breakdown.get(profile, 0) + 1
        
        print("Current Green profiles:")
        for profile, count in profile_breakdown.items():
            print(f"  {profile}: {count} records")
        
        # 2. Find solar Green records to copy
        solar_green_records = list(collection.find({
            "type": "green",
            "profile": "solar"
        }))
        
        print(f"\nFound {len(solar_green_records)} Solar Green records to copy")
        
        if len(solar_green_records) == 0:
            print("ERROR: No Solar Green records found!")
            available_profiles = collection.distinct("profile", {"type": "green"})
            print(f"Available Green profiles: {available_profiles}")
            return
        
        # 3. Check if baseload Green already exists
        existing_baseload = collection.count_documents({
            "type": "green",
            "profile": "baseload"
        })
        
        if existing_baseload > 0:
            print(f"\nWARNING: {existing_baseload} baseload Green records already exist")
            response = input("Delete existing baseload Green records? (y/n): ")
            if response.lower() == 'y':
                delete_result = collection.delete_many({"type": "green", "profile": "baseload"})
                print(f"Deleted {delete_result.deleted_count} existing baseload Green records")
        
        # 4. Create new baseload Green records
        new_baseload_records = []
        
        for solar_record in solar_green_records:
            # Create a copy of the solar record
            baseload_record = solar_record.copy()
            
            # Remove the MongoDB _id to create new records
            if '_id' in baseload_record:
                del baseload_record['_id']
            
            # Change profile from solar to baseload
            baseload_record['profile'] = 'baseload'
            
            # Add timestamp
            baseload_record['createdAt'] = datetime.now()
            
            new_baseload_records.append(baseload_record)
        
        print(f"\nPreparing to insert {len(new_baseload_records)} new baseload Green records")
        
        # Show sample of what will be created
        print("\nSample of new records:")
        for i, record in enumerate(new_baseload_records[:3]):
            print(f"  {record['state']} - {record['year']}-{record['month']:02d} - ${record['price']}")
        
        # 5. Insert new records
        if len(new_baseload_records) > 0:
            insert_result = collection.insert_many(new_baseload_records)
            print(f"\nSUCCESS: Inserted {len(insert_result.inserted_ids)} new baseload Green records")
        
        # 6. Verify results
        print("\n=== Final Green Price Data ===")
        final_green = list(collection.find({"type": "green"}))
        final_profile_breakdown = {}
        for record in final_green:
            profile = record.get('profile', 'unknown')
            final_profile_breakdown[profile] = final_profile_breakdown.get(profile, 0) + 1
        
        print("Final Green profiles:")
        for profile, count in final_profile_breakdown.items():
            print(f"  {profile}: {count} records")
        
        # 7. Show available states for baseload Green
        baseload_states = collection.distinct("state", {"type": "green", "profile": "baseload"})
        print(f"\nAvailable states for Green baseload: {', '.join(baseload_states)}")
        
        # 8. Test queries that your MtM will use
        print("\n=== Testing MtM Query Patterns ===")
        
        test_queries = [
            {"type": "green", "profile": "baseload"},
            {"type": "Green", "profile": "baseload"},  # Capitalized
        ]
        
        for query in test_queries:
            count = collection.count_documents(query)
            print(f"Query {query}: {count} records found")
        
        print("\nYour MtM should now find Green prices using these keys:")
        print("- 'green - baseload'")
        print("- 'Green - baseload'") 
        print("- 'green' (if you modify lookup to default to baseload)")
        
    except Exception as e:
        print(f"Error processing data: {e}")
        
    finally:
        client.close()
        print("\nMongoDB connection closed")

if __name__ == "__main__":
    print("=== Green Baseload Price Creator ===")
    create_green_baseload_prices()
    print("\n=== Script completed ===")

=== Green Baseload Price Creator ===
Connected to MongoDB: energy_contracts

=== Current Green Price Data ===
Found 4080 existing Green records
Current Green profiles:
  solar: 2040 records
  wind: 2040 records

Found 2040 Solar Green records to copy

Preparing to insert 2040 new baseload Green records

Sample of new records:
  QLD - 2024-07 - $38.0
  SA - 2024-07 - $38.0
  VIC - 2024-07 - $38.0

SUCCESS: Inserted 2040 new baseload Green records

=== Final Green Price Data ===
Final Green profiles:
  solar: 2040 records
  wind: 2040 records
  baseload: 2040 records

Available states for Green baseload: NSW, QLD, SA, VIC

=== Testing MtM Query Patterns ===
Query {'type': 'green', 'profile': 'baseload'}: 2040 records found
Query {'type': 'Green', 'profile': 'baseload'}: 0 records found

Your MtM should now find Green prices using these keys:
- 'green - baseload'
- 'Green - baseload'
- 'green' (if you modify lookup to default to baseload)

MongoDB connection closed

=== Script completed =