In [None]:
import pandas as pd
import requests
import json
from datetime import datetime
from typing import Dict, List, Any

class LWPUpdater:
    def __init__(self, api_base_url: str = "http://localhost:3000/api/contracts"):
        self.api_base_url = api_base_url
        
    def load_csv(self, filepath: str) -> pd.DataFrame:
        """Load and parse the CSV file"""
        try:
            df = pd.read_csv(filepath)
            print(f"Loaded CSV with {len(df)} rows and columns: {list(df.columns)}")
            return df
        except Exception as e:
            print(f"Error loading CSV: {e}")
            return None
    
    def clean_csv_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Clean the CSV data and extract LWP information"""
        # Clean column names
        df.columns = df.columns.str.strip()
        
        # Check if LWP column exists
        lwp_column = None
        for col in df.columns:
            if 'lwp' in col.lower() or 'dwp' in col.lower():
                lwp_column = col
                break
        
        if not lwp_column:
            print("ERROR: Could not find LWP_DWP_% column in CSV")
            return pd.DataFrame()
        
        print(f"Found LWP column: {lwp_column}")
        
        # Required columns for matching
        required_columns = ['New_deal_name', 'ContractType', 'Year', 'Month', lwp_column]
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            print(f"Missing required columns: {missing_columns}")
            return pd.DataFrame()
        
        # Clean the data
        df_clean = df[required_columns].copy()
        
        # Clean LWP data - remove % symbol, convert to numeric, and convert to decimal (divide by 100)
        df_clean[lwp_column] = df_clean[lwp_column].astype(str).str.replace('%', '').str.replace(',', '').str.strip()
        df_clean[lwp_column] = pd.to_numeric(df_clean[lwp_column], errors='coerce').fillna(0)
        # Convert percentage to decimal (25% becomes 0.25)
        df_clean[lwp_column] = df_clean[lwp_column] / 100
        
        # Rename for easier handling
        df_clean = df_clean.rename(columns={lwp_column: 'LWP_Percentage'})
        
        # Remove rows with missing deal names
        df_clean = df_clean.dropna(subset=['New_deal_name'])
        
        print(f"Cleaned data: {len(df_clean)} rows")
        print(f"Sample LWP data (as decimals): {df_clean['LWP_Percentage'].head().tolist()}")
        
        return df_clean
    
    def group_lwp_data_by_contract(self, df: pd.DataFrame) -> Dict[str, Dict]:
        """Group LWP data by contract name and type"""
        lwp_data = {}
        
        for _, row in df.iterrows():
            deal_name = row['New_deal_name']
            contract_type = row.get('ContractType', 'Energy')
            year = int(row['Year'])
            month = int(row['Month'])
            lwp_percentage = float(row['LWP_Percentage'])
            
            # Create contract key (same logic as original importer)
            contract_key = f"{deal_name}_{contract_type}"
            
            # Create contract name (same logic as original importer)
            contract_name = f"{deal_name} ({contract_type})" if contract_type != 'Energy' else deal_name
            
            if contract_key not in lwp_data:
                lwp_data[contract_key] = {
                    'contract_name': contract_name,
                    'deal_name': deal_name,
                    'contract_type': contract_type,
                    'lwp_by_period': {}
                }
            
            # Store LWP data by period
            period_key = f"{year}-{str(month).zfill(2)}"
            lwp_data[contract_key]['lwp_by_period'][period_key] = lwp_percentage
        
        print(f"Grouped LWP data for {len(lwp_data)} contracts")
        return lwp_data
    
    def fetch_existing_contracts(self) -> List[Dict]:
        """Fetch existing contracts from API"""
        try:
            response = requests.get(self.api_base_url)
            if response.status_code == 200:
                contracts = response.json()
                print(f"Fetched {len(contracts)} existing contracts from database")
                return contracts
            else:
                print(f"Error fetching contracts: {response.status_code}")
                return []
        except Exception as e:
            print(f"Error fetching contracts: {e}")
            return []
    
    def find_matching_contract(self, contract_name: str, existing_contracts: List[Dict]) -> Dict:
        """Find existing contract that matches the name"""
        for contract in existing_contracts:
            if contract['name'].lower() == contract_name.lower():
                return contract
        return None
    
    def calculate_volume_weighted_lwp_average(self, time_series_data: List[Dict], lwp_by_period: Dict[str, float]) -> float:
        """Calculate volume-weighted average LWP percentage for the contract"""
        total_lwp_value = 0
        total_volume = 0
        
        for point in time_series_data:
            period = point.get('period')
            volume = point.get('volume', 0)
            lwp_percentage = lwp_by_period.get(period, 0)
            
            total_lwp_value += lwp_percentage * volume
            total_volume += volume
        
        if total_volume > 0:
            return round(total_lwp_value / total_volume, 4)  # 4 decimal places for precision
        else:
            return 0
    
    def calculate_annual_lwp_averages(self, time_series_data: List[Dict], lwp_by_period: Dict[str, float]) -> Dict[int, float]:
        """Calculate volume-weighted LWP averages by year"""
        yearly_data = {}
        
        for point in time_series_data:
            # Extract year from timestamp
            timestamp = point.get('timestamp')
            if timestamp:
                try:
                    year = datetime.fromisoformat(timestamp).year
                except:
                    # Try to extract year from period if timestamp parsing fails
                    period = point.get('period', '')
                    year = int(period.split('-')[0]) if '-' in period else 2025
            else:
                # Fallback to period
                period = point.get('period', '')
                year = int(period.split('-')[0]) if '-' in period else 2025
            
            period = point.get('period')
            volume = point.get('volume', 0)
            lwp_percentage = lwp_by_period.get(period, 0)
            
            if year not in yearly_data:
                yearly_data[year] = {'total_lwp_value': 0, 'total_volume': 0}
            
            yearly_data[year]['total_lwp_value'] += lwp_percentage * volume
            yearly_data[year]['total_volume'] += volume
        
        # Calculate weighted averages (results are already in decimal format)
        annual_lwp = {}
        for year, data in yearly_data.items():
            if data['total_volume'] > 0:
                annual_lwp[year] = round(data['total_lwp_value'] / data['total_volume'], 4)  # 4 decimal places for precision
            else:
                annual_lwp[year] = 0
        
        return annual_lwp
    
    def update_contract_with_lwp(self, contract_id: str, contract_name: str, lwp_by_period: Dict[str, float], 
                                existing_time_series: List[Dict]) -> bool:
        """Update existing contract with LWP percentage data"""
        try:
            # Add LWP percentage to each time series point
            updated_time_series = []
            for point in existing_time_series:
                updated_point = point.copy()
                period = point.get('period')
                lwp_percentage = lwp_by_period.get(period, 0)
                updated_point['lwpPercentage'] = lwp_percentage
                updated_time_series.append(updated_point)
            
            # Calculate overall and annual LWP averages
            overall_lwp = self.calculate_volume_weighted_lwp_average(existing_time_series, lwp_by_period)
            annual_lwp = self.calculate_annual_lwp_averages(existing_time_series, lwp_by_period)
            
            # Prepare update payload
            payload = {
                'action': 'updateLWP',
                'contractId': contract_id,
                'timeSeriesData': updated_time_series,
                'lwpPercentage': overall_lwp,
                'annualLwpPercentages': annual_lwp
            }
            
            response = requests.patch(self.api_base_url, json=payload)
            
            if response.status_code == 200:
                print(f"  ✓ Updated contract '{contract_name}' with LWP data")
                print(f"    Overall LWP: {overall_lwp:.1%}")  # Display as percentage for readability
                print(f"    Annual LWP: {annual_lwp}")
                return True
            else:
                print(f"  ✗ Error updating contract '{contract_name}': {response.status_code} - {response.text}")
                return False
                
        except Exception as e:
            print(f"  ✗ Error updating contract '{contract_name}': {e}")
            return False
    
    def process_lwp_updates(self, filepath: str, dry_run: bool = True):
        """Main method to process LWP updates for existing contracts"""
        print(f"Starting LWP percentage update process (dry_run={dry_run})...")
        print("="*60)
        
        # Load and clean CSV data
        df = self.load_csv(filepath)
        if df is None:
            return
        
        df_clean = self.clean_csv_data(df)
        if len(df_clean) == 0:
            print("No valid LWP data to process")
            return
        
        # Group LWP data by contract
        lwp_data = self.group_lwp_data_by_contract(df_clean)
        
        # Fetch existing contracts
        existing_contracts = self.fetch_existing_contracts()
        if not existing_contracts:
            print("No existing contracts found in database")
            return
        
        print("\\nProcessing LWP updates...")
        print("-" * 60)
        
        # Process each contract
        updated_count = 0
        not_found_count = 0
        error_count = 0
        
        for contract_key, contract_lwp_data in lwp_data.items():
            contract_name = contract_lwp_data['contract_name']
            lwp_by_period = contract_lwp_data['lwp_by_period']
            
            print(f"\\nProcessing: {contract_name}")
            print(f"  LWP data points: {len(lwp_by_period)}")
            
            # Find matching contract in database
            existing_contract = self.find_matching_contract(contract_name, existing_contracts)
            
            if existing_contract:
                print(f"  → Found in database: {existing_contract.get('_id', 'Unknown ID')}")
                
                # Get existing time series data
                existing_time_series = existing_contract.get('timeSeriesData', [])
                if not existing_time_series:
                    print(f"  ⚠ Warning: No time series data found in existing contract")
                    error_count += 1
                    continue
                
                print(f"  → Existing time series points: {len(existing_time_series)}")
                
                if not dry_run:
                    success = self.update_contract_with_lwp(
                        existing_contract['_id'],
                        contract_name,
                        lwp_by_period,
                        existing_time_series
                    )
                    if success:
                        updated_count += 1
                    else:
                        error_count += 1
                else:
                    # Dry run - show what would be updated
                    sample_lwp_values = list(lwp_by_period.values())[:5]
                    print(f"  → Would add LWP data (sample values: {sample_lwp_values})")
                    
                    # Calculate what the averages would be
                    overall_lwp = self.calculate_volume_weighted_lwp_average(existing_time_series, lwp_by_period)
                    annual_lwp = self.calculate_annual_lwp_averages(existing_time_series, lwp_by_period)
                    
                    print(f"  → Would set overall LWP: {overall_lwp:.1%}")  # Display as percentage for readability
                    print(f"  → Would set annual LWP: {annual_lwp}")
                    updated_count += 1
            else:
                print(f"  ✗ Contract not found in database")
                not_found_count += 1
        
        # Summary
        print("\\n" + "="*60)
        print(f"LWP UPDATE SUMMARY ({'DRY RUN' if dry_run else 'LIVE RUN'})")
        print("="*60)
        print(f"Contracts updated: {updated_count}")
        print(f"Contracts not found: {not_found_count}")
        print(f"Errors: {error_count}")
        print(f"Total in CSV: {len(lwp_data)}")
        
        if dry_run:
            print(f"\\n→ This was a dry run. To execute the updates, run with dry_run=False")
        else:
            print(f"\\n→ LWP update process completed!")

# Example usage
if __name__ == "__main__":
    updater = LWPUpdater("http://localhost:3000/api/contracts")
    
    # First run a dry run to see what would be updated
    print("Running LWP update dry run...")
    updater.process_lwp_updates("contracts.csv", dry_run=False)
    
    # Uncomment the line below to execute the actual updates
    # print("\\nRunning actual LWP updates...")
    # updater.process_lwp_updates("contracts.csv", dry_run=False)

Running LWP update dry run...
Starting LWP percentage update process (dry_run=True)...
Loaded CSV with 9230 rows and columns: ['Trade', 'Deal_name', 'New_deal_name', 'State', 'type', 'MonthStart', 'MonthEnd', 'StartDate', 'EndDate', 'Month', 'Year', 'annualVolume', 'AvestrikePrice', 'unit', 'ContractType', 'Sub_Type', 'Strike_Price_Timeseries', 'Unnamed: 17', 'Cap_Floor_Strike_price', ' Volume ', 'LWP_DWP_%', 'MtM']
Found LWP column: LWP_DWP_%
Cleaned data: 9230 rows
Sample LWP data (as decimals): [0.78, 0.97, 0.63, 0.86, 1.0]
Grouped LWP data for 416 contracts
Fetched 412 existing contracts from database
\nProcessing LWP updates...
------------------------------------------------------------
\nProcessing: BVC TOU
  LWP data points: 4
  → Found in database: 684796f9a9ed324d11af4b48
  → Existing time series points: 4
  → Would add LWP data (sample values: [0.78, 0.97, 0.63, 0.86])
  → Would set overall LWP: 77.4%
  → Would set annual LWP: {2025: 0.774}
\nProcessing: BVC TOU (Green)
  LW