In [14]:
import pandas as pd
import requests
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any
import re
import calendar

class ContractDataImporter:
    def __init__(self, api_base_url: str = "http://localhost:3000/api/contracts"):
        self.api_base_url = api_base_url
        self.contracts_cache = {}
        
    def load_csv(self, filepath: str) -> pd.DataFrame:
        """Load and parse the CSV file"""
        try:
            df = pd.read_csv(filepath)
            print(f"Loaded CSV with {len(df)} rows and columns: {list(df.columns)}")
            return df
        except Exception as e:
            print(f"Error loading CSV: {e}")
            return None
    
    def clean_and_validate_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Clean and validate the CSV data"""
        # Clean column names by stripping whitespace
        df.columns = df.columns.str.strip()
        
        # Print actual columns for debugging
        print(f"Actual CSV columns (after cleaning): {list(df.columns)}")
        
        # Map columns to find the right ones based on new structure
        column_mapping = {}
        
        # Required columns mapping for new CSV structure
        required_mappings = {
            'Trade': 'Trade',  # New column for buy/sell
            'New_deal_name': 'New_deal_name',
            'State': 'State', 
            'type': 'type',
            'Year': 'Year',
            'Month': 'Month',
            'Volume': 'Volume',
            'AvestrikePrice': 'AvestrikePrice',  # Changed from Price_CY
            'unit': 'unit',
            'ContractType': 'ContractType',  # New column
            'Sub_Type': 'Sub_Type',
            'StartDate': 'StartDate',  # New column
            'EndDate': 'EndDate'       # New column
        }
        
        # Find matching columns
        for standard_name, csv_name in required_mappings.items():
            matching_cols = [col for col in df.columns if csv_name.lower() == col.lower()]
            if matching_cols:
                column_mapping[standard_name] = matching_cols[0]
            else:
                print(f"Warning: Could not find column matching '{csv_name}'")
        
        # Check if we found all critical columns
        critical_columns = ['New_deal_name', 'State', 'type', 'Year', 'Month', 'Volume', 'AvestrikePrice']
        missing_critical = [col for col in critical_columns if col not in column_mapping]
        
        if missing_critical:
            print(f"Missing critical columns: {missing_critical}")
            return pd.DataFrame()  # Return empty dataframe if critical columns missing
        
        # Clean data
        df_clean = df.copy()
        
        # Rename columns to standard names for easier processing
        df_clean = df_clean.rename(columns={v: k for k, v in column_mapping.items()})
        
        # Convert Volume to numeric, handling commas and dashes
        if 'Volume' in df_clean.columns:
            print(f"Processing Volume column...")
            df_clean['Volume'] = df_clean['Volume'].astype(str).str.replace(',', '').str.replace('-', '0').str.strip()
            df_clean['Volume'] = pd.to_numeric(df_clean['Volume'], errors='coerce').fillna(0)
            print(f"Sample Volume data: {df_clean['Volume'].head().tolist()}")
        
        # Convert AvestrikePrice to numeric
        if 'AvestrikePrice' in df_clean.columns:
            df_clean['AvestrikePrice'] = df_clean['AvestrikePrice'].astype(str).str.replace(',', '').str.strip()
            df_clean['AvestrikePrice'] = pd.to_numeric(df_clean['AvestrikePrice'], errors='coerce').fillna(0)
        
        # Remove rows with missing deal names
        df_clean = df_clean.dropna(subset=['New_deal_name'])
        
        print(f"Cleaned data: {len(df_clean)} rows remaining")
        return df_clean
    
    def parse_date_from_row(self, row) -> str:
        """Parse date from Year and Month columns"""
        try:
            year = int(row['Year'])
            month = int(row['Month'])
            # Create first day of the month as timestamp
            date = datetime(year, month, 1)
            return date.isoformat()
        except:
            return datetime.now().isoformat()
    
    def get_hours_in_period(self, year: int, month: int) -> int:
        """Get the number of hours in a given month"""
        days_in_month = calendar.monthrange(year, month)[1]
        return days_in_month * 24
    
    def convert_volume_for_contract_type(self, volume_mwh: float, contract_type: str, year: int, month: int) -> float:
        """Convert MWh to appropriate unit based on contract type"""
        if contract_type == 'wholesale':
            # Convert MWh to MW by dividing by hours in the period
            hours_in_period = self.get_hours_in_period(year, month)
            volume_mw = volume_mwh / hours_in_period
            return round(volume_mw, 3)  # Round to 3 decimal places for MW
        else:
            # Retail and offtake contracts keep MWh
            return round(volume_mwh, 0)  # Round to whole MWh
    
    def get_volume_unit(self, contract_type: str) -> str:
        """Get the appropriate volume unit based on contract type"""
        if contract_type == 'wholesale':
            return 'MW'
        else:
            return 'MWh'
    
    def map_contract_type(self, csv_type: str) -> str:
        """Map CSV type to contract type"""
        type_mapping = {
            'Retail': 'retail',
            'Wholesale': 'wholesale', 
            'Offtake': 'offtake'
        }
        return type_mapping.get(csv_type, 'retail')
    
    def map_trade_direction(self, trade_value: str) -> str:
        """Map Trade column value to standard buy/sell format"""
        if not trade_value or pd.isna(trade_value):
            return 'sell'  # Default to sell
        
        trade_lower = str(trade_value).lower().strip()
        if 'buy' in trade_lower or 'purchase' in trade_lower:
            return 'buy'
        else:
            return 'sell'
    
    def map_contract_category(self, contract_type: str, deal_name: str, sub_type: str = None) -> str:
        """Determine contract category based on type, deal name, and sub_type"""
        if contract_type == 'retail':
            if 'Government' in deal_name or 'Gov' in deal_name:
                return 'Government Customer'
            elif 'Industrial' in deal_name or 'Mining' in deal_name:
                return 'Industrial Customer'
            else:
                return 'Retail Customer'
        elif contract_type == 'wholesale':
            # Use Sub_Type for wholesale contracts
            if sub_type and sub_type.strip():
                return sub_type.strip()
            else:
                return 'Swap'  # Default fallback
        else:  # offtake
            if 'Solar' in deal_name:
                return 'Solar Farm'
            elif 'Wind' in deal_name:
                return 'Wind Farm'
            else:
                return 'Solar Farm'
    
    def group_data_by_contract(self, df: pd.DataFrame) -> Dict[str, List[Dict]]:
        """Group data by unique deal name (aggregating all years) and contract type"""
        contracts_data = {}
        
        for _, row in df.iterrows():
            deal_name = row['New_deal_name']
            year = row['Year']
            contract_type_from_csv = row.get('ContractType', 'Energy')  # Energy or LGC
            sub_type = row.get('Sub_Type', '')  # Get Sub_Type for wholesale contracts
            contract_type = self.map_contract_type(row['type'])
            trade_direction = self.map_trade_direction(row.get('Trade', 'Sell'))
            
            # Create unique key for contract (deal name + contract type from CSV)
            contract_key = f"{deal_name}_{contract_type_from_csv}"
            
            if contract_key not in contracts_data:
                contracts_data[contract_key] = {
                    'deal_name': deal_name,
                    'contract_type_csv': contract_type_from_csv,  # Energy or LGC
                    'sub_type': sub_type,
                    'state': row['State'],
                    'type': contract_type,  # retail, wholesale, offtake
                    'trade_direction': trade_direction,  # buy or sell
                    'time_series_data': [],
                    'years_covered': set(),
                    'contract_info': {
                        'start_date': row.get('StartDate', ''),
                        'end_date': row.get('EndDate', ''),
                        'unit': row.get('unit', 'MWh')
                    }
                }
            
            # Add year to covered years
            contracts_data[contract_key]['years_covered'].add(year)
            
            # Convert volume based on contract type
            original_volume_mwh = float(row['Volume'])
            converted_volume = self.convert_volume_for_contract_type(
                original_volume_mwh, contract_type, year, row['Month']
            )
            
            # Add time series point
            timestamp = self.parse_date_from_row(row)
            period = f"{year}-{str(row['Month']).zfill(2)}"
            
            time_point = {
                'timestamp': timestamp,
                'volume': converted_volume,
                'price': round(float(row['AvestrikePrice']), 2),  # Round to 2 decimal places
                'period': period,
                'periodType': 'monthly'
            }
            
            contracts_data[contract_key]['time_series_data'].append(time_point)
        
        # Convert years_covered set to sorted list for better display
        for contract_key in contracts_data:
            years_list = sorted(list(contracts_data[contract_key]['years_covered']))
            contracts_data[contract_key]['years_covered'] = years_list
            volume_unit = self.get_volume_unit(contracts_data[contract_key]['type'])
            trade_dir = contracts_data[contract_key]['trade_direction']
            print(f"Contract: {contracts_data[contract_key]['deal_name']} ({contracts_data[contract_key]['contract_type_csv']}) - Type: {contracts_data[contract_key]['type']} - Unit: {volume_unit} - Trade: {trade_dir} - Years: {years_list}")
        
        print(f"Grouped data into {len(contracts_data)} unique contracts")
        return contracts_data
    
    def calculate_annual_volume(self, time_series: List[Dict], years_covered: List[int], contract_type: str) -> float:
        """Calculate volume for one full calendar year if data exists, otherwise return total"""
        if not time_series:
            return 0
        
        # Group time series by year
        yearly_volumes = {}
        for point in time_series:
            timestamp = datetime.fromisoformat(point['timestamp'])
            year = timestamp.year
            
            if year not in yearly_volumes:
                yearly_volumes[year] = {'volume': 0, 'months': set()}
            
            yearly_volumes[year]['volume'] += point['volume']
            yearly_volumes[year]['months'].add(timestamp.month)
        
        # Check if we have any complete year (12 months of data)
        complete_years = []
        for year, data in yearly_volumes.items():
            if len(data['months']) == 12:  # Full calendar year
                complete_years.append((year, data['volume']))
        
        if complete_years:
            # Return volume from the most recent complete year
            complete_years.sort(key=lambda x: x[0], reverse=True)  # Sort by year, newest first
            volume = complete_years[0][1]
        else:
            # No complete year found, return total volume for all available data
            volume = sum(point['volume'] for point in time_series)
        
        # Round appropriately based on contract type
        if contract_type == 'wholesale':
            return round(volume, 3)  # MW to 3 decimal places
        else:
            return round(volume, 0)  # MWh to whole numbers
    
    def clear_database(self) -> bool:
        """Clear all contracts from the database"""
        try:
            # First fetch all contracts to get their IDs
            response = requests.get(self.api_base_url)
            if response.status_code != 200:
                print(f"Error fetching contracts for deletion: {response.status_code}")
                return False
            
            contracts = response.json()
            print(f"Found {len(contracts)} contracts to delete...")
            
            deleted_count = 0
            error_count = 0
            
            for contract in contracts:
                contract_id = contract.get('_id') or contract.get('id')
                if contract_id:
                    try:
                        delete_response = requests.delete(f"{self.api_base_url}?id={contract_id}")
                        if delete_response.status_code == 200:
                            deleted_count += 1
                            print(f"  ✓ Deleted: {contract.get('name', 'Unknown')}")
                        else:
                            error_count += 1
                            print(f"  ✗ Error deleting {contract.get('name', 'Unknown')}: {delete_response.status_code}")
                    except Exception as e:
                        error_count += 1
                        print(f"  ✗ Error deleting {contract.get('name', 'Unknown')}: {e}")
                else:
                    error_count += 1
                    print(f"  ✗ No ID found for contract: {contract.get('name', 'Unknown')}")
            
            print(f"\nDatabase clearing summary:")
            print(f"  Deleted: {deleted_count}")
            print(f"  Errors: {error_count}")
            print(f"  Total processed: {len(contracts)}")
            
            return error_count == 0
            
        except Exception as e:
            print(f"Error clearing database: {e}")
            return False
    
    def fetch_existing_contracts(self) -> List[Dict]:
        """Fetch existing contracts from API"""
        try:
            response = requests.get(self.api_base_url)
            if response.status_code == 200:
                contracts = response.json()
                print(f"Fetched {len(contracts)} existing contracts")
                return contracts
            else:
                print(f"Error fetching contracts: {response.status_code}")
                return []
        except Exception as e:
            print(f"Error fetching contracts: {e}")
            return []
    
    def find_matching_contract(self, deal_name: str, existing_contracts: List[Dict]) -> Dict:
        """Find existing contract that matches the deal name"""
        for contract in existing_contracts:
            if contract['name'].lower() == deal_name.lower():
                return contract
        return None
    
    def calculate_annual_strike_prices(self, time_series: List[Dict]) -> Dict[int, float]:
        """Calculate volume-weighted average strike price for each calendar year"""
        if not time_series:
            return {}
        
        # Group by year and calculate volume-weighted averages
        yearly_data = {}
        for point in time_series:
            timestamp = datetime.fromisoformat(point['timestamp'])
            year = timestamp.year
            
            if year not in yearly_data:
                yearly_data[year] = {'total_value': 0, 'total_volume': 0}
            
            # Volume-weighted calculation
            value = point['price'] * point['volume']
            yearly_data[year]['total_value'] += value
            yearly_data[year]['total_volume'] += point['volume']
        
        # Calculate weighted averages
        annual_prices = {}
        for year, data in yearly_data.items():
            if data['total_volume'] > 0:
                annual_prices[year] = round(data['total_value'] / data['total_volume'], 2)
            else:
                annual_prices[year] = 0
        
        return annual_prices

    def get_representative_strike_price(self, time_series: List[Dict], years_covered: List[int]) -> float:
        """Get a representative strike price for the contract"""
        annual_prices = self.calculate_annual_strike_prices(time_series)
        
        if not annual_prices:
            return 0
        
        # If we have multiple years, use the most recent complete year's price
        # Otherwise use the average across all available data
        if len(annual_prices) == 1:
            return list(annual_prices.values())[0]
        else:
            # Return the most recent year's price
            most_recent_year = max(annual_prices.keys())
            return annual_prices[most_recent_year]

    def create_new_contract(self, contract_data: Dict) -> Dict:
        """Create a new contract from grouped data"""
        deal_name = contract_data['deal_name']
        contract_type_csv = contract_data['contract_type_csv']
        sub_type = contract_data.get('sub_type', '')
        time_series = contract_data['time_series_data']
        years_covered = contract_data['years_covered']
        contract_type = contract_data['type']
        trade_direction = contract_data['trade_direction']
        contract_info = contract_data['contract_info']
        
        # Get appropriate volume unit
        volume_unit = self.get_volume_unit(contract_type)
        
        # Process time series data similar to volume editor
        # Sort time series by timestamp to ensure proper ordering
        time_series_sorted = sorted(time_series, key=lambda x: x['timestamp'])
        
        # Calculate aggregated values
        total_volume = sum(point['volume'] for point in time_series_sorted)
        
        # Calculate representative strike price and annual prices
        strike_price = self.get_representative_strike_price(time_series_sorted, years_covered)
        annual_strike_prices = self.calculate_annual_strike_prices(time_series_sorted)
        
        # Calculate annual volume - try to get one full calendar year if data exists
        annual_volume = self.calculate_annual_volume(time_series_sorted, years_covered, contract_type)
        
        # Use EndDate from CSV if available, otherwise calculate from time series
        if contract_info.get('end_date') and contract_info['end_date'].strip():
            end_date = contract_info['end_date'].strip()
            # Convert to YYYY-MM-DD format if needed
            try:
                # Try parsing different date formats and convert to YYYY-MM-DD
                if '/' in end_date:
                    # Handle DD/MM/YYYY or MM/DD/YYYY format
                    date_obj = datetime.strptime(end_date, '%d/%m/%Y')
                    end_date = date_obj.strftime('%Y-%m-%d')
                elif len(end_date) == 10 and end_date[4] == '-':
                    # Already in YYYY-MM-DD format
                    pass
                else:
                    # Try other formats if needed
                    date_obj = datetime.strptime(end_date, '%Y-%m-%d')
                    end_date = date_obj.strftime('%Y-%m-%d')
            except:
                # If parsing fails, fall back to time series calculation
                timestamps = [datetime.fromisoformat(point['timestamp']) for point in time_series_sorted]
                end_date = max(timestamps).strftime('%Y-%m-%d') if timestamps else f"{max(years_covered)}-12-31"
        else:
            # Fall back to calculating from time series
            timestamps = [datetime.fromisoformat(point['timestamp']) for point in time_series_sorted]
            end_date = max(timestamps).strftime('%Y-%m-%d') if timestamps else f"{max(years_covered)}-12-31"
        
        # Use StartDate from CSV if available, otherwise calculate from time series
        if contract_info.get('start_date') and contract_info['start_date'].strip():
            start_date = contract_info['start_date'].strip()
            # Convert to YYYY-MM-DD format if needed
            try:
                if '/' in start_date:
                    # Handle DD/MM/YYYY or MM/DD/YYYY format
                    date_obj = datetime.strptime(start_date, '%d/%m/%Y')
                    start_date = date_obj.strftime('%Y-%m-%d')
                elif len(start_date) == 10 and start_date[4] == '-':
                    # Already in YYYY-MM-DD format
                    pass
                else:
                    # Try other formats if needed
                    date_obj = datetime.strptime(start_date, '%Y-%m-%d')
                    start_date = date_obj.strftime('%Y-%m-%d')
            except:
                # If parsing fails, fall back to time series calculation
                timestamps = [datetime.fromisoformat(point['timestamp']) for point in time_series_sorted]
                start_date = min(timestamps).strftime('%Y-%m-%d') if timestamps else f"{min(years_covered)}-01-01"
        else:
            # Fall back to calculating from time series
            timestamps = [datetime.fromisoformat(point['timestamp']) for point in time_series_sorted]
            start_date = min(timestamps).strftime('%Y-%m-%d') if timestamps else f"{min(years_covered)}-01-01"
        
        # Create contract name that includes contract type if not Energy
        contract_name = f"{deal_name} ({contract_type_csv})" if contract_type_csv != 'Energy' else deal_name
        
        new_contract = {
            'name': contract_name,
            'type': contract_type,
            'category': self.map_contract_category(contract_type, deal_name, sub_type),
            'state': contract_data['state'],
            'counterparty': deal_name,  # Use deal name as counterparty
            'startDate': start_date,
            'endDate': end_date,
            'annualVolume': annual_volume,  # Volume for one full CY if available, otherwise total for available data
            'strikePrice': strike_price,  # Representative strike price
            'unit': volume_unit,  # Use MW for wholesale, MWh for others
            'volumeShape': 'custom',  # Always custom since we have time series data
            'status': 'active',
            'indexation': 'Fixed',
            'referenceDate': start_date,
            'pricingType': 'timeseries',
            'timeSeriesData': time_series_sorted,  # Store all monthly data points
            'dataSource': 'csv_import',
            'yearsCovered': years_covered,
            'totalVolume': total_volume,
            'contractType': contract_type_csv,  # Energy or LGC
            'tradeDirection': trade_direction,  # buy or sell
            'annualStrikePrices': annual_strike_prices  # One strike price per calendar year
        }
        
        return new_contract
    
    def update_contract_with_timeseries(self, contract_id: str, contract_data: Dict) -> bool:
        """Update existing contract with time series data and pricing using PATCH endpoint"""
        try:
            # Calculate annual strike prices for the update
            time_series = contract_data['time_series_data']
            annual_strike_prices = self.calculate_annual_strike_prices(time_series)
            representative_price = self.get_representative_strike_price(time_series, contract_data['years_covered'])
            
            payload = {
                'action': 'updateTimeSeries',
                'contractId': contract_id,
                'timeSeriesData': time_series,
                'dataSource': 'csv_import',
                'totalVolume': contract_data['total_volume'],
                'yearsCovered': contract_data['years_covered'],
                'strikePrice': representative_price,  # Update main strike price
                'annualStrikePrices': annual_strike_prices,  # Add annual prices
                'tradeDirection': contract_data['trade_direction']  # Update trade direction
            }
            
            response = requests.patch(self.api_base_url, json=payload)
            
            if response.status_code == 200:
                print(f"Successfully updated contract {contract_id} with time series data and pricing")
                return True
            else:
                print(f"Error updating contract {contract_id}: {response.status_code} - {response.text}")
                return False
        except Exception as e:
            print(f"Error updating contract {contract_id}: {e}")
            return False
    
    def create_contract_via_api(self, contract_data: Dict) -> str:
        """Create new contract via API and return contract ID"""
        try:
            response = requests.post(self.api_base_url, json=contract_data)
            
            if response.status_code == 201:
                new_contract = response.json()
                contract_id = new_contract.get('_id')
                print(f"Successfully created contract: {contract_data['name']} (ID: {contract_id})")
                return contract_id
            else:
                print(f"Error creating contract {contract_data['name']}: {response.status_code} - {response.text}")
                return None
        except Exception as e:
            print(f"Error creating contract {contract_data['name']}: {e}")
            return None
    
    def process_csv_file(self, filepath: str, dry_run: bool = True, clear_db: bool = False):
        """Main method to process the CSV file and update contracts"""
        print(f"Starting CSV import process (dry_run={dry_run}, clear_db={clear_db})...")
        
        # Clear database if requested
        if clear_db:
            print("\n" + "="*50)
            print("CLEARING DATABASE")
            print("="*50)
            if not dry_run:
                success = self.clear_database()
                if not success:
                    print("Database clearing failed. Aborting import.")
                    return
                print("Database cleared successfully!")
            else:
                print("Would clear all existing contracts from database")
            print()
        
        # Load and clean data
        df = self.load_csv(filepath)
        if df is None:
            return
        
        df_clean = self.clean_and_validate_data(df)
        if len(df_clean) == 0:
            print("No valid data to process")
            return
        
        # Group data by contract
        contracts_data = self.group_data_by_contract(df_clean)
        
        # Fetch existing contracts (will be empty if database was cleared)
        existing_contracts = self.fetch_existing_contracts() if not clear_db or dry_run else []
        
        # Process each contract
        created_count = 0
        updated_count = 0
        error_count = 0
        
        for contract_key, contract_data in contracts_data.items():
            deal_name = contract_data['deal_name']
            contract_type_csv = contract_data['contract_type_csv']
            years_covered = contract_data['years_covered']
            time_series = contract_data['time_series_data']
            contract_type = contract_data['type']
            trade_direction = contract_data['trade_direction']
            volume_unit = self.get_volume_unit(contract_type)
            
            print(f"\nProcessing: {deal_name} ({contract_type_csv}) - Type: {contract_type} - Unit: {volume_unit} - Trade: {trade_direction} - Years: {years_covered} - {len(time_series)} data points")
            
            # Check if contract exists (look for deal name with or without contract type suffix)
            existing_contract = self.find_matching_contract(deal_name, existing_contracts)
            if not existing_contract and contract_type_csv != 'Energy':
                # Also try looking for contract name with contract type suffix
                contract_name_with_type = f"{deal_name} ({contract_type_csv})"
                existing_contract = self.find_matching_contract(contract_name_with_type, existing_contracts)
            
            if existing_contract:
                # Update existing contract with time series data
                print(f"  → Found existing contract: {existing_contract['name']}")
                
                # Prepare contract data for update
                update_data = {
                    'time_series_data': time_series,
                    'total_volume': sum(point['volume'] for point in time_series),
                    'years_covered': years_covered,
                    'trade_direction': trade_direction
                }
                
                if not dry_run:
                    success = self.update_contract_with_timeseries(
                        existing_contract['_id'], 
                        update_data
                    )
                    if success:
                        updated_count += 1
                    else:
                        error_count += 1
                else:
                    print(f"  → Would update existing contract with {len(time_series)} time series points")
                    annual_prices = self.calculate_annual_strike_prices(time_series)
                    print(f"     Annual Strike Prices: {annual_prices}")
                    print(f"     Trade Direction: {trade_direction}")
                    updated_count += 1
            else:
                # Create new contract
                print(f"  → No existing contract found, will create new one")
                new_contract_data = self.create_new_contract(contract_data)
                
                if not dry_run:
                    contract_id = self.create_contract_via_api(new_contract_data)
                    if contract_id:
                        created_count += 1
                    else:
                        error_count += 1
                else:
                    print(f"  → Would create new contract: {new_contract_data['name']}")
                    print(f"     Type: {new_contract_data['type']}, State: {new_contract_data['state']}, Unit: {new_contract_data['unit']}")
                    print(f"     Trade Direction: {new_contract_data['tradeDirection']}, Contract Type: {new_contract_data['contractType']}")
                    print(f"     Annual Volume: {new_contract_data['annualVolume']:,.3f} {volume_unit}, Strike Price: ${new_contract_data['strikePrice']:.2f}")
                    print(f"     Total Volume: {new_contract_data['totalVolume']:,.3f} {volume_unit}, Time Series Points: {len(time_series)}, Years: {years_covered}")
                    print(f"     Annual Strike Prices: {new_contract_data['annualStrikePrices']}")
                    print(f"     Start Date: {new_contract_data['startDate']}, End Date: {new_contract_data['endDate']}")
                    created_count += 1
        
        # Summary
        print(f"\n{'='*50}")
        print(f"IMPORT SUMMARY ({'DRY RUN' if dry_run else 'LIVE RUN'})")
        print(f"{'='*50}")
        print(f"Contracts created: {created_count}")
        print(f"Contracts updated: {updated_count}")
        print(f"Errors: {error_count}")
        print(f"Total processed: {len(contracts_data)}")
        
        if dry_run:
            print(f"\nThis was a dry run. To execute the import, run with dry_run=False")

# Example usage
if __name__ == "__main__":
    importer = ContractDataImporter("http://localhost:3000/api/contracts")
    
    # First run a dry run to see what would happen
    print("Running dry run...")
    importer.process_csv_file("contracts.csv", dry_run=False, clear_db=True)
    
    # Uncomment the line below to execute the actual import
    # importer.process_csv_file("contracts.csv", dry_run=False, clear_db=False)

Running dry run...
Starting CSV import process (dry_run=False, clear_db=True)...

CLEARING DATABASE
Found 412 contracts to delete...
  ✓ Deleted: BVC TOU
  ✓ Deleted: BVC TOU (Green)
  ✓ Deleted: CSIRO TOU
  ✓ Deleted: CSIRO TOU (Green)
  ✓ Deleted: Fairfield Non-TOU
  ✓ Deleted: Fairfield Non-TOU (Green)
  ✓ Deleted: Fairfield TOU
  ✓ Deleted: Fairfield TOU (Green)
  ✓ Deleted: Occupier Non-TOU
  ✓ Deleted: SBS TOU
  ✓ Deleted: SBS TOU (Green)
  ✓ Deleted: SSROC Non-TOU
  ✓ Deleted: SSROC Non-TOU (Green)
  ✓ Deleted: SSROC TOU
  ✓ Deleted: SSROC TOU (Green)
  ✓ Deleted: ISPT Non-TOU
  ✓ Deleted: ISPT Non-TOU (Green)
  ✓ Deleted: ISPT TOU
  ✓ Deleted: ISPT TOU (Green)
  ✓ Deleted: Adelaide Metro Non-TOU
  ✓ Deleted: Adelaide Metro Non-TOU (Green)
  ✓ Deleted: Adelaide Metro TOU
  ✓ Deleted: Adelaide Metro TOU (Green)
  ✓ Deleted: Bunnings TOU
  ✓ Deleted: Bunnings TOU (Green)
  ✓ Deleted: CBUS TOU
  ✓ Deleted: CBUS TOU (Green)
  ✓ Deleted: Hentley Farm TOU
  ✓ Deleted: Hentley Farm TOU