In [1]:
import pandas as pd
import pymongo
from datetime import datetime
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def upload_price_intervals_to_mongodb():
    """
    Upload price intervals data from CSV to MongoDB price_curves_intervals collection
    """
    
    # MongoDB connection
    try:
        # MongoDB connection string - update .env file or use directly
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb+srv://ProjectHalo:5apsFwxTlqN8WHQR@cluster0.quuwlhb.mongodb.net/energy_contracts?retryWrites=true&w=majority&appName=Cluster0')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        db = client[MONGODB_DB]
        collection = db['price_curves_intervals']  # New collection name
        
        print(f"Connected to MongoDB: {MONGODB_DB}")
        
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return
    
    # Read the CSV file
    try:
        # Update this path to your new CSV file location
        csv_file_path = 'price_monthly_jan_2025.csv'
        df = pd.read_csv(csv_file_path)
        
        print(f"Loaded CSV with {len(df)} records")
        print(f"Columns: {list(df.columns)}")
        
        # Show sample data
        print(f"Sample data (first 3 rows):")
        print(df.head(3).to_string())
        
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return
    
    # Process the data
    try:
        # Add curve name and metadata
        curve_name = "Aurora Jan 2025 Intervals"
        
        # Convert Interval_date column to datetime
        df['date'] = pd.to_datetime(df['Interval_date'], format='%d/%m/%Y')
        df['year'] = df['date'].dt.year
        df['month_num'] = df['date'].dt.month
        df['month_name'] = df['date'].dt.strftime('%B')
        
        # Rename columns to match your existing structure
        df_processed = df.rename(columns={
            'Region': 'state',
            'ContracType': 'type', 
            'price_real_$': 'price',
            'Interval_date': 'time'
        })
        
        # Add metadata columns
        df_processed['curve'] = curve_name
        df_processed['uploadedAt'] = datetime.now()
        df_processed['updatedAt'] = datetime.now()
        
        # Keep additional columns that might be useful
        # FY, CY, Month, period_30, Scenario are preserved
        
        print(f"Data processing complete. Sample processed record:")
        sample_record = df_processed.head(1).to_dict('records')[0]
        print(sample_record)
        
    except Exception as e:
        print(f"Error processing data: {e}")
        return
    
    # Upload to MongoDB
    try:
        # Clear existing data for this curve if it exists
        existing_count = collection.count_documents({'curve': curve_name})
        if existing_count > 0:
            print(f"Found {existing_count} existing records for curve '{curve_name}'. Deleting...")
            collection.delete_many({'curve': curve_name})
        
        # Convert DataFrame to list of dictionaries
        records = df_processed.to_dict('records')
        
        # Insert new data
        result = collection.insert_many(records)
        
        print(f"Successfully uploaded {len(result.inserted_ids)} records to MongoDB")
        print(f"Curve name: {curve_name}")
        
        # Verify upload
        total_records = collection.count_documents({'curve': curve_name})
        print(f"Verification: {total_records} records found in database for this curve")
        
        # Show some statistics
        print("\nData summary:")
        states = df_processed['state'].unique()
        types = df_processed['type'].unique()
        scenarios = df_processed['Scenario'].unique()
        fy_range = f"{df_processed['FY'].min()} to {df_processed['FY'].max()}"
        date_range = f"{df_processed['date'].min().strftime('%Y-%m-%d')} to {df_processed['date'].max().strftime('%Y-%m-%d')}"
        
        print(f"States: {states}")
        print(f"Contract Types: {types}")
        print(f"Scenarios: {scenarios}")
        print(f"Financial Years: {fy_range}")
        print(f"Date range: {date_range}")
        print(f"Total combinations: {len(states)} states × {len(types)} types × {len(scenarios)} scenarios")
        
    except Exception as e:
        print(f"Error uploading to MongoDB: {e}")
        return
    
    finally:
        client.close()
        print("MongoDB connection closed")

def test_mongodb_connection():
    """
    Test MongoDB connection before uploading
    """
    try:
        MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb+srv://ProjectHalo:5apsFwxTlqN8WHQR@cluster0.quuwlhb.mongodb.net/energy_contracts?retryWrites=true&w=majority&appName=Cluster0')
        MONGODB_DB = os.getenv('MONGODB_DB', 'energy_contracts')
        
        client = pymongo.MongoClient(MONGODB_URI)
        
        # Test connection
        client.admin.command('ping')
        print("MongoDB connection successful!")
        
        # List databases
        databases = client.list_database_names()
        print(f"Available databases: {databases}")
        
        # Check if our database exists
        if MONGODB_DB in databases:
            db = client[MONGODB_DB]
            collections = db.list_collection_names()
            print(f"Collections in {MONGODB_DB}: {collections}")
        else:
            print(f"Database {MONGODB_DB} does not exist yet (will be created)")
        
        client.close()
        return True
        
    except Exception as e:
        print(f"MongoDB connection failed: {e}")
        return False

if __name__ == "__main__":
    print("=== Price Curves Intervals Upload Script ===")
    print("1. Testing MongoDB connection...")
    
    if test_mongodb_connection():
        print("\n2. Uploading price intervals data...")
        upload_price_intervals_to_mongodb()
    else:
        print("Please check your MongoDB connection settings and try again.")
    
    print("\n=== Script completed ===")

=== Price Curves Intervals Upload Script ===
1. Testing MongoDB connection...
MongoDB connection successful!
Available databases: ['energy_contracts', 'admin', 'local']
Collections in energy_contracts: ['contracts', 'price_curves']

2. Uploading price intervals data...
Connected to MongoDB: energy_contracts
Loaded CSV with 2592 records
Columns: ['Region', 'ContracType', 'FY', 'CY', 'Month', 'Interval_date', 'period_30', 'Scenario', 'price_real_$']
Sample data (first 3 rows):
  Region ContracType    FY    CY  Month Interval_date period_30 Scenario  price_real_$
0    NSW      Energy  2025  2024      7     1/07/2024       all  Central        126.86
1    NSW      Energy  2025  2024      8     1/08/2024       all  Central        120.72
2    NSW      Energy  2025  2024      9     1/09/2024       all  Central        102.02
Data processing complete. Sample processed record:
{'state': 'NSW', 'type': 'Energy', 'FY': 2025, 'CY': 2024, 'Month': 7, 'time': '1/07/2024', 'period_30': 'all', 'Scenario