In [1]:
import math
import numpy as np
import pandas as pd
from IPython.display import display
import pandas as pd
import csv

def detect_csv_delimiter(csv_path):
    """
    Detect the delimiter used in a CSV file by checking the first line.
    Returns the delimiter that appears more frequently.
    """
    with open(csv_path, 'r', encoding='utf-8') as f:
        first_line = f.readline().strip()
    
    semicolon_count = first_line.count(';')
    comma_count = first_line.count(',')
    
    # Return the delimiter that appears more frequently
    if semicolon_count > comma_count:
        return ';'
    elif comma_count > semicolon_count:
        return ','
    else:
        # If counts are equal, default to semicolon (original behavior)
        # You could also raise an exception here if preferred
        return ';'

def load_and_label_pfas_csv(csv_path):
    """
    Load a PFAS CSV file, combine headers, and return a labeled DataFrame.
    Automatically detects whether the file uses semicolon (;) or comma (,) delimiters.
    
    Parameters
    ----------
    csv_path : str
        Path to the raw CSV file.
    Returns
    -------
    pd.DataFrame
        DataFrame with combined headers for PFAS compounds.
    """
    # Detect the delimiter
    delimiter = detect_csv_delimiter(csv_path)
    print(f"Detected delimiter: '{delimiter}'")
    
    # Read the first two lines to construct headers
    with open(csv_path, 'r', encoding='utf-8') as f:
        first_line = f.readline().strip().split(delimiter)
        second_line = f.readline().strip().split(delimiter)
    
    # Extract PFAS compound names from the first line (skip 'Sample')
    pfas_compounds = [name.replace(' Results', '') for name in first_line[1:] if 'Results' in name]
    print(pfas_compounds)
    
    # Define isotope prefixes to exclude
    isotope_prefixes = ("13C", "D3-", "D5-", "D7-", "D9-", "18O")
    
    # Example: if pfas_compounds is a list of compound names
    native_compounds = [c for c in pfas_compounds if not c.startswith(isotope_prefixes)]
    
    print(f"Total compounds: {len(pfas_compounds)}")
    print(f"Without isotope prefix: {len(native_compounds)}")
    
    # Check if S/N exists at all
    has_sn = "S/N" in second_line
    
    # Create combined headers
    combined_headers = []
    pfas_index = 0
    for header in second_line:
        # Keep initial metadata columns as they are
        if header in ['Name', 'Data File', 'Type', 'Level', 'Acq. Date-Time']:
            combined_headers.append(header)
        # For PFAS-related columns
        elif header in ['RT', 'Final Conc.', 'Calc. Conc.', 'Surrogate % Recovery',
                        'Accuracy', 'ISTD Conc. Ratio', 'Area', 'RR', 'S/N']:
            if pfas_index < len(pfas_compounds):
                combined_headers.append(f"{pfas_compounds[pfas_index]}_{header}")
                
                # increment logic:
                if  header == 'Accuracy':
                    pfas_index += 1
            else:
                combined_headers.append(header)
        else:
            combined_headers.append(header)
    
    # Read the actual data (skip first 2 lines) using the detected delimiter
    rawdata = pd.read_csv(csv_path, delimiter=delimiter, skiprows=2, header=None)
    
    # Assign the combined headers (handle extra/fewer columns)
    if len(combined_headers) <= len(rawdata.columns):
        rawdata.columns = combined_headers + [f"Extra_Col_{i}" for i in range(len(combined_headers), len(rawdata.columns))]
    else:
        rawdata.columns = combined_headers[:len(rawdata.columns)]
    
    return rawdata

def check_calibration_sn_ratios(rawdata):
    """
    Check if all compounds in calibration samples have S/N ratios above 3
    
    Parameters:
    rawdata: DataFrame with PFAS data containing Type column and S/N ratio columns
    
    Returns:
    dict: Summary of S/N ratio check results
    """
    
    # Filter for calibration samples
    cal_samples = rawdata[rawdata['Type'] == 'Cal'].copy()
    
    if cal_samples.empty:
        return {"error": "No calibration samples found in the data"}
    
    # Find all S/N ratio columns (assuming format: compoundname_S/N)
    sn_columns = [col for col in rawdata.columns if col.endswith('_S/N')]
    
    if not sn_columns:
        return {"error": "No S/N ratio columns found"}
    
    print(f"Found {len(sn_columns)} S/N ratio columns for compounds:")
    for col in sn_columns[:5]:  # Show first 5
        print(f"  - {col}")
    if len(sn_columns) > 5:
        print(f"  ... and {len(sn_columns) - 5} more")
    
    results = {
        'total_compounds': len(sn_columns),
        'total_cal_samples': len(cal_samples),
        'compounds_failing_sn3': [],
        'compounds_passing_sn3': [],
        'summary_stats': {}
    }
    
    # Check each compound's S/N ratios in calibration samples
    for sn_col in sn_columns:
        compound_name = sn_col.replace('_S/N', '')
        
        # Get S/N values for this compound in cal samples
        sn_values = cal_samples[sn_col]
        
        # Remove NaN values
        valid_sn_values = sn_values.dropna()
        
        if len(valid_sn_values) == 0:
            results['compounds_failing_sn3'].append({
                'compound': compound_name,
                'issue': 'No valid S/N values found',
                'min_sn': None,
                'samples_below_3': None
            })
            continue
        
        # Check if all values are above 3
        min_sn = valid_sn_values.min()
        samples_below_3 = (valid_sn_values < 3).sum()
        
        results['summary_stats'][compound_name] = {
            'min_sn': min_sn,
            'mean_sn': valid_sn_values.mean(),
            'samples_below_3': samples_below_3,
            'total_samples': len(valid_sn_values)
        }
        
        if min_sn < 3:
            results['compounds_failing_sn3'].append({
                'compound': compound_name,
                'min_sn': min_sn,
                'samples_below_3': samples_below_3,
                'total_samples': len(valid_sn_values)
            })
        else:
            results['compounds_passing_sn3'].append(compound_name)
    
    return results

def print_sn_check_results(results):
    """Print formatted results of S/N ratio check"""
    
    if 'error' in results:
        print(f"Error: {results['error']}")
        return
    
    print("\n" + "="*60)
    print("PFAS CALIBRATION S/N RATIO CHECK RESULTS")
    print("="*60)
    
    print(f"Total compounds analyzed: {results['total_compounds']}")
    print(f"Total calibration samples: {results['total_cal_samples']}")
    
    passing_count = len(results['compounds_passing_sn3'])
    failing_count = len(results['compounds_failing_sn3'])
    
    print(f"\nCompounds with ALL S/N ratios ≥ 3: {passing_count}")
    print(f"Compounds with ANY S/N ratios < 3: {failing_count}")
    
    if failing_count == 0:
        print("\n✅ SUCCESS: All compounds in calibration samples have S/N ratios ≥ 3")
    else:
        print(f"\n❌ ISSUE: {failing_count} compounds have S/N ratios < 3 in calibration samples")
        
        print("\nCompounds failing S/N ≥ 3 criterion:")
        print("-" * 40)
        for fail in results['compounds_failing_sn3']:
            if fail.get('issue'):
                print(f"  {fail['compound']}: {fail['issue']}")
            else:
                print(f"  {fail['compound']}: Min S/N = {fail['min_sn']:.2f}, "
                      f"{fail['samples_below_3']}/{fail['total_samples']} samples < 3")
    
    print("\n" + "="*60)

    
def calculate_rse(expected, measured, compound_name, p=2):
    if len(expected) != len(measured):
        raise ValueError("Expected and measured arrays must have the same length")

    def compute_rse(expected, measured, p, compound_name):
        n = len(expected)
        sum_relative_squared_error = 0
        detailed_results = []

        print(f"\n{compound_name} (p={p})")
        print("=" * 60)
        print("Point\tExpected\tMeasured\tDifference\t(Diff)²/xi²\t(Diff)²/xi²/(n-p)")
        print("-" * 80)

        for i in range(n):
            diff = measured[i] - expected[i]
            nominator = (diff / expected[i]) ** 2
            relative_squared_error = nominator / (n - p)
            sum_relative_squared_error += relative_squared_error

            detailed_results.append({
                'Point': f'CAL {i+1}',
                'Expected': expected[i],
                'Measured': measured[i],
                'Difference': diff,
                'Nominator': nominator,
                'Relative_Squared_Error_Term': relative_squared_error
            })

            print(f"CAL {i+1}\t{expected[i]}\t\t{measured[i]:.4f}\t\t{diff:.4f}\t\t"
                  f"{nominator:.6f}\t{relative_squared_error:.6f}")

        rse = 100 * math.sqrt(sum_relative_squared_error)

        print("-" * 80)
        print(f"Sum of relative squared errors: {sum_relative_squared_error:.6f}")
        print(f"RSE = 100 * √({sum_relative_squared_error:.6f}) = {rse:.3f}%")

        return rse, detailed_results, sum_relative_squared_error

    rse, details, sum_rse = compute_rse(expected, measured, p, compound_name)
    relationship_type = "linear" if p == 2 else "quadratic"

    if p == 2 and rse > 20:
        print("\nLinearity test failed (RSE > 20%), recalculating with p=3 (quadratic)...")
        p = 3
        rse, details, sum_rse = compute_rse(expected, measured, p, compound_name)
        relationship_type = "quadratic"

    return {
        'compound_name': compound_name,
        'n_points': len(expected),
        'parameters': p,
        'relationship_type': relationship_type,
        'rse_percent': rse,
        'sum_relative_squared_error': sum_rse,
        'detailed_results': details
    }

def calculate_target_rse(df, expected_concentrations, p=2, level_to_exclude=None):
    target_analytes = []
    for col in df.columns:
        if '_Conc_Calc' in col:
            compound_name = col.replace('_Conc_Calc', '')
            if compound_name in expected_concentrations:
                target_analytes.append(compound_name)
    
    print(f"Identified target analytes: {target_analytes}")
    print(f"Number of identified target analytes: {len(target_analytes)}")
    
    cal_data = df[df['Type'] == 'Cal'].copy()
    if cal_data.empty:
        print("No CAL samples found in the data")
        return {}, pd.DataFrame()
    
    rse_results_target = {}
    rse_results_excluded = {}
    
    for compound in target_analytes:
        conc_col = f"{compound}_Conc_Calc"
        if conc_col not in df.columns or compound not in expected_concentrations:
            continue
        
        actual_concs = cal_data[conc_col].values
        expected_list = np.array(expected_concentrations[compound], dtype=float)
        level_list = cal_data["Level"].values
        
        # --- Handle special compounds (exclude 7th point) ---
        special_compounds = ['4:2FTS', '6:2FTS', '8:2FTS']
        if compound in special_compounds:
            if len(actual_concs) > 6:
                actual_concs = actual_concs[:6]
                expected_list = expected_list[:6]
                level_list = level_list[:6]
                print(f"Using only first 6 calibration points for {compound}")
        
        # --- Normal RSE ---
        valid_mask = ~np.isnan(actual_concs)
        actual_clean = actual_concs[valid_mask]
        expected_clean = expected_list[valid_mask]
        
        if len(actual_clean) == 0:
            continue
        
        rse_normal = calculate_rse(expected_clean.tolist(), actual_clean.tolist(), compound, p)
        rse_results_target[compound] = rse_normal
        
        # --- Exclude calibration level (e.g., Level == 3) ---
        mask_excl = (level_list != level_to_exclude) & valid_mask
        actual_excl = actual_concs[mask_excl]
        expected_excl = expected_list[mask_excl]
        
        if len(actual_excl) > 0:
            rse_excl = calculate_rse(expected_excl.tolist(), actual_excl.tolist(), compound, p)
            rse_results_excluded[compound] = rse_excl
    

    
    return rse_results_target, rse_results_excluded



def print_rse_summary(rse_results):
    print("\n" + "=" * 60)
    print("COMPOUND RSE SUMMARY")
    print("=" * 60)
    # First output: List of compounds that fail the linearity test (RSE > 20%)
    failed_compounds = []
    for compound, results in rse_results.items():
        if results['rse_percent'] > 20:
            failed_compounds.append(compound)
    
    print("Compounds that do not pass the linearity test (RSE > 20%):")
    for compound in failed_compounds:
        print(f"  - {compound}")
    
    print("\ndetailed output below")

    for compound, results in rse_results.items():
        print(f"\n{compound}:")
        print(f"  RSE: {results['rse_percent']:.3f}%")
        print(f"  Calibration Points: {results['n_points']}")
        print(f"  Relationship type: {results['relationship_type']} (p = {results['parameters']})")

        if results['relationship_type'] == "quadratic" and results['rse_percent'] > 20:
            print("  ⚠ WARNING: Even after quadratic fit (p=3), RSE > 20% — system fails.")
        elif results['relationship_type'] == "quadratic":
            print("  ✔ Quadratic fit passed RSE < 20%.")
        elif results['rse_percent'] > 20:
            print("  ⚠ Linear fit failed RSE > 20%.")

def df_rse_results(rse_results):
    """
    Save RSE results to a CSV file.
    
    Parameters
    ----------
    rse_results : dict
        Dictionary containing RSE results for each compound
    filename : str
        Name of the output CSV file
    """
    # Prepare data for CSV
    csv_data = []
    
    for compound, results in rse_results.items():
        # Determine pass/fail status
        pass_fail = "FAIL" if results['rse_percent'] > 20 else "PASS"
        
        csv_data.append({
            'Compound': compound,
            'RSE_Percent': results['rse_percent'],
            'Calibration_Points': results['n_points'],
            'Relationship_Type': results['relationship_type'],
            'Parameters': results['parameters'],
            'Pass_Fail': pass_fail,
            'Sum_Relative_Squared_Error': results['sum_relative_squared_error']
        })
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(csv_data)
    
    return df


def calculate_average_RFs_EIS(rawdata, EIS_NIS_analogs, calibration_solutions):
    """
    Calculate average response factors for EIS compounds across calibration levels.
    
    Parameters:
    rawdata: DataFrame containing the analytical data
    EIS_NIS_analogs: Dictionary mapping EIS compounds to their corresponding NIS
    calibration_solutions: Dictionary containing mass data for L1-L7 calibration levels
    
    Returns:
    DataFrame with compound name, mass NIS, mass EIS, RR values, and average response factor
    """
    
    # Identify relevant columns (Area and RR) that match EIS_NIS_analogs
    relevant_columns = []
    for col in rawdata.columns:
        if ('Area' in col or 'RR' in col):
            # Extract compound name from column (assuming format like "CompoundName_Area" or "CompoundName_RR")
            compound_name = col.replace('_Area', '').replace('_RR', '')
            if compound_name in EIS_NIS_analogs:
                relevant_columns.append(col)
    
    # Filter dataframe for CAL sample type
    cal_data = rawdata[rawdata['Type'] == 'Cal'].copy()
    
    # Initialize results list
    results = []
    
    # Get unique EIS compounds from the relevant columns
    eis_compounds = set()
    for col in relevant_columns:
        if '_RR' in col:  # We need RR columns for calculation
            compound_name = col.replace('_RR', '')
            if compound_name in EIS_NIS_analogs:
                eis_compounds.add(compound_name)
    
    # Process each EIS compound
    for eis_compound in eis_compounds:
        # Get corresponding NIS compound
        nis_compound = EIS_NIS_analogs[eis_compound]
        
        # Get RR column for this compound
        rr_column = f"{eis_compound}_RR"
        
        if rr_column not in cal_data.columns:
            print(f"Warning: RR column {rr_column} not found in data")
            continue
        
        # Calculate response factors for each calibration level
        response_factors = []
        level_data = []
        
        for level in ['L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7']:
            # Get calibration level data
            level_cal_data = cal_data[cal_data['Level'] == level]
            
            if level_cal_data.empty:
                print(f"Warning: No data found for level {level}")
                continue
            
            # Get masses from calibration_solutions
            # Check if compounds exist in calibration_solutions
            if nis_compound not in calibration_solutions:
                print(f"Warning: NIS compound {nis_compound} not found in calibration_solutions")
                continue
            if eis_compound not in calibration_solutions:
                print(f"Warning: EIS compound {eis_compound} not found in calibration_solutions")
                continue
            
            # Get mass of NIS and EIS for this level
            mass_nis = calibration_solutions[nis_compound].get(level, np.nan)
            mass_eis = calibration_solutions[eis_compound].get(level, np.nan)
            
            if pd.isna(mass_nis) or pd.isna(mass_eis):
                print(f"Warning: Missing mass data for {eis_compound} or {nis_compound} at level {level}")
                continue
            
            # Get RR value for this level (assuming one value per level)
            rr_values = level_cal_data[rr_column].dropna()
            
            if rr_values.empty:
                print(f"Warning: No RR values found for {eis_compound} at level {level}")
                continue
            
            # Use the first (or mean if multiple) RR value for this level
            compound_rr = rr_values.iloc[0] if len(rr_values) == 1 else rr_values.mean()
            
            # Calculate response factor: compound_RR * (mass_NIS / mass_EIS)
            if mass_eis != 0:
                response_factor = compound_rr * (mass_nis / mass_eis)
                response_factors.append(response_factor)
                
                level_data.append({
                    'Level': level,
                    'Compound_RR': compound_rr,
                    'Mass_NIS': mass_nis,
                    'Mass_EIS': mass_eis,
                    'Response_Factor': response_factor
                })
            else:
                print(f"Warning: Mass EIS is zero for {eis_compound} at level {level}")
        
        # Calculate average response factor
        if response_factors:
            avg_response_factor = np.mean(response_factors)
            
            # Get representative masses (from L4 or first available level)
            rep_level = 'L4'
            if nis_compound in calibration_solutions and rep_level in calibration_solutions[nis_compound]:
                rep_mass_nis = calibration_solutions[nis_compound][rep_level]
            else:
                rep_mass_nis = np.nan
                
            if eis_compound in calibration_solutions and rep_level in calibration_solutions[eis_compound]:
                rep_mass_eis = calibration_solutions[eis_compound][rep_level]
            else:
                rep_mass_eis = np.nan
            
            results.append({
                'Compound_Name': eis_compound,
                'NIS_Analog': nis_compound,
                'Mass_NIS': rep_mass_nis,
                'Mass_EIS': rep_mass_eis,
                'RR_Values': [ld['Compound_RR'] for ld in level_data],
                'Response_Factors': response_factors,
                'Average_Response_Factor': avg_response_factor,
                'N_Levels': len(response_factors)
            })
        else:
            print(f"Warning: No valid response factors calculated for {eis_compound}")
    
    # Create output DataFrame
    if results:
    # Create DataFrame
        output_df = pd.DataFrame([
            {
                'Compound_Name': r['Compound_Name'],
                'NIS_Analog': r['NIS_Analog'],
                'Mass_NIS': r['Mass_NIS'],
                'Mass_EIS': r['Mass_EIS'],
                'Average_Response_Factor': r['Average_Response_Factor'],
                'RSD_%': (np.std(r['Response_Factors']) / np.mean(r['Response_Factors']) * 100)
                        if len(r['Response_Factors']) > 1 else np.nan
            }
            for r in results
        ])
    
        # Round numerical columns
        output_df['Mass_NIS'] = output_df['Mass_NIS'].round(3)
        output_df['Mass_EIS'] = output_df['Mass_EIS'].round(3)
        output_df['Average_Response_Factor'] = output_df['Average_Response_Factor'].round(4)
        output_df['RSD_%'] = output_df['RSD_%'].round(2)
    
        # Display clean table in Jupyter
        print(f"\nAVERAGE RESPONSE FACTORS FOR EIS COMPOUNDS (n={len(output_df)})")
        display(output_df)
        
        return output_df
    else:
        print("No valid results calculated")
        return pd.DataFrame()
        print("No valid results calculated")
        return pd.DataFrame()

def calc_mass_added_NIS(NIS_stock, dilution_NIS_stock, spiked_amount):
    """
    Calculate mass of NIS added for each compound.
    
    Parameters
    ----------
    NIS_stock : dict
        Stock concentrations of NIS (ng/mL)
    dilution_NIS_stock : float
        Dilution factor (e.g., 10 for 1:10 dilution)
    spiked_amount : float
        Volume spiked [L]
    
    Returns
    -------
    dict
        Mass of each NIS compound added [ng]
    """
    NIS_added_dict = {}

    for compound, stock_conc in NIS_stock.items():
        # Convert spiked volume (L) → mL to match ng/mL units
        NIS_added = (stock_conc / dilution_NIS_stock) * (spiked_amount)
        NIS_added_dict[compound] = NIS_added

    return NIS_added_dict

def calc_mass_added_EIS(EIS_stock, dilution_EIS_stock, spiked_amount):
    """
    Calculate spiked concentration of EIS in ng/L.

    Parameters
    ----------
    EIS_stock : dict
        Stock concentrations of EIS (ng/mL).
    dilution_EIS_stock : float
        Dilution factor (e.g., 10 for 1:10 dilution).
    spiked_amount : float
        Volume spiked [mL].

    Returns
    -------
    dict
        Spiked concentration of each EIS compound [ng/L].
    """
    EIS_added_dict = {}

    for compound, stock_conc in EIS_stock.items():
        # Amount added (ng) = (conc in ng/mL / dilution) * spiked volume (mL)
        EIS_added_ng = (stock_conc / dilution_EIS_stock) * spiked_amount  
        # Convert to concentration in ng/L by dividing by sample volume
        EIS_spiked_conc = EIS_added_ng
        EIS_added_dict[compound] = EIS_spiked_conc

    return EIS_added_dict

        

def calculate_conc_EIS(rawdata, EIS_RFS, dilution_NIS_stock, EIS_NIS_analogs,
                       Df, NIS_stock, spiked_amount):
    """
    Calculate concentrations for EIS compounds in sample data.
    Parameters
    ----------
    rawdata : DataFrame
        Input data with RR values, sample info, and Ws (sample weights) column.
    EIS_RFS : DataFrame
        Contains Average_Response_Factor per compound.
    dilution_NIS_stock : float
        Dilution factor for NIS stock.
    EIS_NIS_analogs : dict
        Mapping from EIS compound -> NIS analog.
    Df : float
        Dilution factor for samples.
    NIS_stock : dict
        NIS stock concentrations (ng/mL).
    spiked_amount : float
        Volume spiked [L].
    Returns
    -------
    DataFrame
        With Name, Type, and calculated concentration columns.
    """
    # compute the amount of added NIS per NIS compound 
    NIS_added = calc_mass_added_NIS(NIS_stock, dilution_NIS_stock, spiked_amount)
    
    # Only look at samples (not calibration lines)
    sample_data = rawdata[rawdata['Type'] == 'Sample'].copy()
    
    # Check for Ws column
    if 'Ws' not in sample_data.columns:
        print("Error: 'Ws' column not found in rawdata")
        return None
    
    # Identify samples without Ws values
    missing_ws_mask = sample_data['Ws'].isna()
    samples_without_ws = sample_data[missing_ws_mask]['Name'].tolist()
    
    if samples_without_ws:
        print(f"Warning: The following samples do not have Ws values and will be skipped:")
        for sample in samples_without_ws:
            print(f"  - {sample}")
    
    # Filter to only samples with Ws values
    sample_data_with_ws = sample_data[~missing_ws_mask].copy()
    
    if sample_data_with_ws.empty:
        print("Error: No samples have Ws values")
        return None
    
    # Start output DataFrame with identifiers
    conc_data = sample_data_with_ws[['Name', 'Type', 'Ws']].copy()
    
    # Get unique EIS compounds from analog mapping
    eis_compounds = set(EIS_NIS_analogs.keys())
    
    # Process each EIS compound
    for eis_compound in eis_compounds:
        nis_compound = EIS_NIS_analogs[eis_compound]
        rr_column = f"{eis_compound}_RR"
        
        if rr_column not in sample_data_with_ws.columns:
            print(f"Warning: RR column {rr_column} not found in data")
            continue
        
        # Get average RF for this compound
        avg_rf_row = EIS_RFS[EIS_RFS["Compound_Name"] == eis_compound]
        if avg_rf_row.empty:
            print(f"Warning: No RF found for {eis_compound}")
            continue
        avg_rf = avg_rf_row["Average_Response_Factor"].values[0]
        
        # Get mass of NIS analog added
        if nis_compound not in NIS_added:
            print(f"Warning: No NIS mass found for analog {nis_compound}")
            continue
        m_nis = NIS_added[nis_compound]
        
        # Compute concentrations using individual Ws values
        conc_values = (sample_data_with_ws[rr_column] * m_nis / avg_rf) * Df * (1 / sample_data_with_ws['Ws'])
        conc_data[f"{eis_compound}_Conc_Calc"] = conc_values
    
    print(f"\nCalculated concentrations for {len(conc_data)} samples with Ws values")
    display(conc_data)
    return conc_data


def calculate_recoveries(conc_data, expected_concs):
    """
    Calculate recovery for eachcompound.
    Recovery = (Calculated concentration / Expected concentration) * 100
    
    Parameters
    ----------
    conc_data : DataFrame
        Output from calculate_conc containing *_Conc_Calc columns.
    expected_concs_EIS : dict
        Expected concentrations of compounds (ng/L).
    Ws : float
        Sample volume [L].
    
    Returns
    -------
    DataFrame
        Recovery (%) for each compound per sample.
    """
    # Start recovery DataFrame with identifiers
    recovery_data = conc_data[['Name', 'Type']].copy()
    
    # Loop over compounds in expected concentrations dictionary
    for eis_compound, expected_conc in expected_concs.items():
        calc_col = f"{eis_compound}_Conc_Calc"
        
        if calc_col not in conc_data.columns:
            print(f"Warning: {calc_col} not found in calculated concentrations.")
            continue
        
        # Use the expected concentration directly from the dictionary
        recovery_data[f"{eis_compound}_Recovery_%"] = (
            (conc_data[calc_col] / expected_conc) * 100
        )
    
    display(recovery_data)
    return recovery_data




def validate_recoveries(calculated_recoveries_df, expected_recoveries_df):
    """
    Compare calculated recoveries against expected recovery ranges.
    
    Parameters:
    -----------
    calculated_recoveries_df : pd.DataFrame
        DataFrame containing calculated recoveries with recovery columns named like 'compound_Recovery_%'
    expected_recoveries_df : pd.DataFrame
        DataFrame with columns: 'compound', 'compound_type', 'lower_recovery_percent', 'upper_recovery_percent'
    
    Returns:
    --------
    pd.DataFrame
        Validation results with pass/fail status for each compound and sample
    """
    
    # Create a mapping from expected recoveries for quick lookup
    recovery_ranges = {}
    for _, row in expected_recoveries_df.iterrows():
        recovery_ranges[row['compound']] = {
            'lower': row['lower_recovery_percent'],
            'upper': row['upper_recovery_percent'],
            'type': row['compound_type']
        }
    
    # Get recovery columns from calculated data
    recovery_columns = [col for col in calculated_recoveries_df.columns if col.endswith('_Recovery_%')]
    
    # Initialize results list
    validation_results = []
    
    # Process each row in calculated recoveries
    for idx, row in calculated_recoveries_df.iterrows():
        sample_name = row.get('Name', f'Sample_{idx}')
        sample_type = row.get('Type', 'Unknown')
        
        # Check each recovery column
        for col in recovery_columns:
            # Extract compound name from column (remove '_Recovery_%' suffix)
            compound = col.replace('_Recovery_%', '')
            
            # Get calculated recovery value
            calculated_recovery = row[col]
            
            # Skip if NaN
            if pd.isna(calculated_recovery):
                validation_results.append({
                    'Sample_Name': sample_name,
                    'Sample_Type': sample_type,
                    'Compound': compound,
                    'Calculated_Recovery_%': calculated_recovery,
                    'Expected_Lower_%': np.nan,
                    'Expected_Upper_%': np.nan,
                    'Status': 'No Data',
                    'Compound_Type': 'Unknown'
                })
                continue
            
            # Check if compound exists in expected ranges
            if compound in recovery_ranges:
                lower_limit = recovery_ranges[compound]['lower']
                upper_limit = recovery_ranges[compound]['upper']
                compound_type = recovery_ranges[compound]['type']
                
                # Determine if recovery is within range
                if lower_limit <= calculated_recovery <= upper_limit:
                    status = 'Pass'
                else:
                    status = 'Fail'
                
                validation_results.append({
                    'Sample_Name': sample_name,
                    'Sample_Type': sample_type,
                    'Compound': compound,
                    'Calculated_Recovery_%': calculated_recovery,
                    'Expected_Lower_%': lower_limit,
                    'Expected_Upper_%': upper_limit,
                    'Status': status,
                    'Compound_Type': compound_type
                })
            else:
                # Compound not found in expected ranges
                validation_results.append({
                    'Sample_Name': sample_name,
                    'Sample_Type': sample_type,
                    'Compound': compound,
                    'Calculated_Recovery_%': calculated_recovery,
                    'Expected_Lower_%': np.nan,
                    'Expected_Upper_%': np.nan,
                    'Status': 'No Reference',
                    'Compound_Type': 'Unknown'
                })
    
    return pd.DataFrame(validation_results)

def summarize_validation_results(validation_df):
    """
    Create a summary of validation results.
    
    Parameters:
    -----------
    validation_df : pd.DataFrame
        Output from validate_recoveries function
    
    Returns:
    --------
    dict
        Summary statistics of validation results
    """
    
    summary = {}
    
    # Overall statistics
    total_measurements = len(validation_df)
    passed = len(validation_df[validation_df['Status'] == 'Pass'])
    failed = len(validation_df[validation_df['Status'] == 'Fail'])
    no_data = len(validation_df[validation_df['Status'] == 'No Data'])
    no_reference = len(validation_df[validation_df['Status'] == 'No Reference'])
    
    summary['Overall'] = {
        'Total_Measurements': total_measurements,
        'Passed': passed,
        'Failed': failed,
        'No_Data': no_data,
        'No_Reference': no_reference,
        'Pass_Rate_%': (passed / (passed + failed) * 100) if (passed + failed) > 0 else 0
    }
    
    # By compound type
    if 'Compound_Type' in validation_df.columns:
        summary['By_Compound_Type'] = {}
        for comp_type in validation_df['Compound_Type'].unique():
            type_data = validation_df[validation_df['Compound_Type'] == comp_type]
            type_passed = len(type_data[type_data['Status'] == 'Pass'])
            type_failed = len(type_data[type_data['Status'] == 'Fail'])
            
            summary['By_Compound_Type'][comp_type] = {
                'Passed': type_passed,
                'Failed': type_failed,
                'Pass_Rate_%': (type_passed / (type_passed + type_failed) * 100) if (type_passed + type_failed) > 0 else 0
            }
    
    # By sample
    summary['By_Sample'] = {}
    for sample in validation_df['Sample_Name'].unique():
        sample_data = validation_df[validation_df['Sample_Name'] == sample]
        sample_passed = len(sample_data[sample_data['Status'] == 'Pass'])
        sample_failed = len(sample_data[sample_data['Status'] == 'Fail'])
        
        summary['By_Sample'][sample] = {
            'Passed': sample_passed,
            'Failed': sample_failed,
            'Pass_Rate_%': (sample_passed / (sample_passed + sample_failed) * 100) if (sample_passed + sample_failed) > 0 else 0
        }
    
    return summary

def mean_recoveries(calculated_recoveries_df, expected_recoveries_df):
    """
    Calculate mean recoveries  recoveries across samples:
    - Calculate mean recovery per compound
    - Compare observed variability (RSD) with expected RSD
    
    Parameters
    ----------
    calculated_recoveries_df : pd.DataFrame
        DataFrame containing calculated recoveries with columns like 'compound_Recovery_%'
    expected_recoveries_df : pd.DataFrame
        DataFrame with columns: 'compound', 'compound_type',
        'lower_recovery_percent', 'upper_recovery_percent', 'rsd_percent'
    
    Returns
    -------
    pd.DataFrame
        Summary results with mean recovery, observed RSD, expected RSD,
        and pass/fail flags
    """

    # Extract recovery columns
    recovery_columns = [col for col in calculated_recoveries_df.columns if col.endswith('_Recovery_%')]
    
    summary_results = []

    for col in recovery_columns:
        compound = col.replace('_Recovery_%', '')

        values = calculated_recoveries_df[col].dropna()
        if values.empty:
            continue
        
        mean_recovery = values.mean()
        observed_rsd = (values.std(ddof=1) / mean_recovery * 100) if mean_recovery != 0 else np.nan

        # Lookup expected values
        match = expected_recoveries_df[expected_recoveries_df['compound'] == compound]
        if not match.empty:
            lower = match['lower_recovery_percent'].values[0]
            upper = match['upper_recovery_percent'].values[0]
            expected_rsd = match['rsd_percent'].values[0] if 'rsd_percent' in match else np.nan
            compound_type = match['compound_type'].values[0]
            
            # Flags
            recovery_flag = "Pass" if lower <= mean_recovery <= upper else "Fail"
            rsd_flag = "Pass" if (not np.isnan(expected_rsd) and not np.isnan(observed_rsd) 
                                  and observed_rsd <= expected_rsd) else "Fail"
        else:
            lower, upper, expected_rsd, compound_type = np.nan, np.nan, np.nan, "Unknown"
            recovery_flag, rsd_flag = "No Reference", "No Reference"

        summary_results.append({
            "Compound": compound,
            "Compound_Type": compound_type,
            "Mean_Recovery_%": mean_recovery,
            "Expected_Lower_%": lower,
            "Expected_Upper_%": upper,
            "Recovery_Status": recovery_flag,
            "Observed_RSD_%": observed_rsd,
            "Expected_RSD_%": expected_rsd,
            "RSD_Status": rsd_flag
        })

    return pd.DataFrame(summary_results)



def get_failed_recoveries(validation_df):
    """
    Get only the failed recovery measurements for detailed review.
    
    Parameters:
    -----------
    validation_df : pd.DataFrame
        Output from validate_recoveries function
    
    Returns:
    --------
    pd.DataFrame
        Only the failed measurements
    """
    
    failed_df = validation_df[validation_df['Status'] == 'Fail'].copy()
    display(failed_df)
    
    
    return failed_df


def calculate_average_RRs_targets(rawdata, expected_concs_EIS, calibration_solutions, target_EIS_analogs=None):
    """
    Calculate average response ratios for target compounds using either:
    1. Direct isotope analogs (prefix-based, e.g., 13C8-PFOS -> PFOS), or
    2. External Isotope Surrogates (EIS) from target_EIS_analogs dictionary.

    Formula: RR = Area_target * Mass_IS / (Area_IS * Mass_target)
    """

    # Define isotopic prefixes to remove for direct isotope analogs
    isotope_prefixes = ['D7-', 'D9-', '13C4-', '13C3-', '13C5-', '13C2-', 'D3-', 'D5-', 
                        '13C8-', '13C9-', '13C6-', '13C7-']

    target_EIS_pairs = {}

    # --- Step 1: Build pairs from direct isotope analogs ---
    for eis_compound in expected_concs_EIS.keys():
        target_compound = None
        for prefix in isotope_prefixes:
            if eis_compound.startswith(prefix):
                target_compound = eis_compound.replace(prefix, '', 1)
                break

        if target_compound:
            if target_compound in calibration_solutions:
                target_EIS_pairs[target_compound] = eis_compound

    # --- Step 2: Add pairs from EIS analogs dict---
    if target_EIS_analogs:
        for target, eis in target_EIS_analogs.items():
            if target not in target_EIS_pairs:  # don’t overwrite direct isotope matches
                if target in calibration_solutions and eis in expected_concs_EIS:
                    target_EIS_pairs[target] = eis

    print(f"Found {len(target_EIS_pairs)} total target-IS pairs:")
    for target, eis in target_EIS_pairs.items():
        print(f"  {target} -> {eis}")
    print()

    # --- Step 3: Calibration and RR calculations ---
    cal_data = rawdata[rawdata['Type'] == 'Cal'].copy()
    results = []

    for target_compound, eis_compound in target_EIS_pairs.items():
        target_area_col = f"{target_compound}_Area"
        eis_area_col = f"{eis_compound}_Area"

        # Check if columns exist
        if target_area_col not in cal_data.columns:
            print(f"Warning: Area column {target_area_col} not found in data")
            continue
        if eis_area_col not in cal_data.columns:
            print(f"Warning: Area column {eis_area_col} not found in data")
            continue

        if target_compound not in calibration_solutions:
            print(f"Warning: Target compound {target_compound} not found in calibration_solutions")
            continue
        if eis_compound not in calibration_solutions:
            print(f"Warning: EIS compound {eis_compound} not found in calibration_solutions")
            continue

        base_levels = ['L1','L2','L3','L4','L5','L6','L7']

        # Check if L7 data is available
        l7_available = True
        l7_cal_data = cal_data[cal_data['Level'] == 'L7']
        if l7_cal_data.empty:
            l7_available = False
        else:
            mass_target_l7 = calibration_solutions[target_compound].get('L7', np.nan)
            mass_eis_l7 = calibration_solutions[eis_compound].get('L7', np.nan)
            if pd.isna(mass_target_l7) or pd.isna(mass_eis_l7):
                l7_available = False
            else:
                target_areas_l7 = l7_cal_data[target_area_col].dropna()
                eis_areas_l7 = l7_cal_data[eis_area_col].dropna()
                if target_areas_l7.empty or eis_areas_l7.empty:
                    l7_available = False

        if l7_available:
            levels_to_use = base_levels
        else:
            levels_to_use = base_levels[:-1]

        response_ratios = []
        level_data = []

        for level in levels_to_use:
            level_cal_data = cal_data[cal_data['Level'] == level]
            if level_cal_data.empty:
                print(f"Warning: No data found for level {level}")
                continue

            mass_target = calibration_solutions[target_compound].get(level, np.nan)
            mass_eis = calibration_solutions[eis_compound].get(level, np.nan)
            if pd.isna(mass_target) or pd.isna(mass_eis):
                print(f"Warning: Missing mass data for {target_compound} or {eis_compound} at level {level}")
                continue

            target_areas = level_cal_data[target_area_col].dropna()
            eis_areas = level_cal_data[eis_area_col].dropna()
            if target_areas.empty or eis_areas.empty:
                print(f"Warning: No area values found for {target_compound} or {eis_compound} at level {level}")
                continue

            area_target = target_areas.iloc[0] if len(target_areas) == 1 else target_areas.mean()
            area_eis = eis_areas.iloc[0] if len(eis_areas) == 1 else eis_areas.mean()

            if area_eis != 0 and mass_target != 0:
                response_ratio = (area_target * mass_eis) / (area_eis * mass_target)
                response_ratios.append(response_ratio)
                level_data.append({
                    'Level': level,
                    'Area_Target': area_target,
                    'Area_EIS': area_eis,
                    'Mass_Target': mass_target,
                    'Mass_EIS': mass_eis,
                    'Response_Ratio': response_ratio
                })
            else:
                print(f"Warning: Zero area or mass values for {target_compound}/{eis_compound} at level {level}")

        if response_ratios:
            avg_response_ratio = np.mean(response_ratios)

            rep_level = 'L4'
            if target_compound in calibration_solutions and rep_level in calibration_solutions[target_compound]:
                rep_mass_target = calibration_solutions[target_compound][rep_level]
            else:
                rep_mass_target = np.nan

            if eis_compound in calibration_solutions and rep_level in calibration_solutions[eis_compound]:
                rep_mass_eis = calibration_solutions[eis_compound][rep_level]
            else:
                rep_mass_eis = np.nan

            results.append({
                'Target_Compound': target_compound,
                'EIS_Isotope': eis_compound,
                'Mass_Target': rep_mass_target,
                'Mass_EIS': rep_mass_eis,
                'Area_Values_Target': [ld['Area_Target'] for ld in level_data],
                'Area_Values_EIS': [ld['Area_EIS'] for ld in level_data],
                'Response_Ratios': response_ratios,
                'Average_Response_Ratio': avg_response_ratio,
                'N_Levels': len(response_ratios),
                'Levels_Used': [ld['Level'] for ld in level_data]
            })
        else:
            print(f"Warning: No valid response ratios calculated for {target_compound}")

    # --- Step 4: Build clean output DataFrame ---
    if results:
        output_df = pd.DataFrame([
            {
                'Target_Compound': r['Target_Compound'],
                'EIS_Isotope': r['EIS_Isotope'],
                'Mass_Target (L4)': r['Mass_Target'],
                'Mass_EIS': r['Mass_EIS'],
                'Average_Response_Ratio': r['Average_Response_Ratio'],
                'RSD_%': (np.std(r['Response_Ratios']) / np.mean(r['Response_Ratios']) * 100)
                        if len(r['Response_Ratios']) > 1 else np.nan,
                'N_Levels': r['N_Levels'],
                'Levels_Used': ', '.join(r['Levels_Used'])
            }
            for r in results
        ])

        output_df['Mass_Target (L4)'] = output_df['Mass_Target (L4)'].round(3)
        output_df['Mass_EIS'] = output_df['Mass_EIS'].round(3)
        output_df['Average_Response_Ratio'] = output_df['Average_Response_Ratio'].round(4)
        output_df['RSD_%'] = output_df['RSD_%'].round(2)

        from IPython.display import display
        print(f"\nAVERAGE RESPONSE RATIOS FOR TARGET COMPOUNDS WITH ISOTOPES (n={len(output_df)})")
        display(output_df)
        return output_df
    else:
        print("No valid results calculated")
        return pd.DataFrame()

def calculate_conc_targets(rawdata,
                           RF_target_analytes,
                           EIS_stock, dilution_EIS_stock, spiked_amount, Ws,
                           Df, sampletype,
                           debug_compound=None):
    """
    Calculate concentrations for target analytes using EIS internal standards.

    Formula:
        Conc_target = (Area_target * M_EIS) / (Area_EIS * RF_target) * Df * (1 / Ws)
    """
    # compute the spiked mass of each EIS
    EIS_added = calc_mass_added_EIS(EIS_stock, dilution_EIS_stock, spiked_amount)

    # Only process sample rows
    sample_data = rawdata[rawdata['Type'] == sampletype].copy()

    # Start output DataFrame
    conc_data = sample_data[['Name', 'Type', 'Level']].copy()

    # Loop over each target compound
    for _, row in RF_target_analytes.iterrows():
        target = row['Target_Compound']
        eis_compound = row['EIS_Isotope']
        rf_target = row['Average_Response_Ratio']

        target_area_col = f"{target}_Area"
        eis_area_col = f"{eis_compound}_Area"

        # Check required columns
        if target_area_col not in sample_data.columns or eis_area_col not in sample_data.columns:
            print(f"Warning: Missing area column(s) for {target} or {eis_compound}")
            continue

        # Get spiked mass of EIS
        if eis_compound not in EIS_added:
            print(f"Warning: No spiked mass found for EIS {eis_compound}")
            continue
        m_eis = EIS_added[eis_compound]

        # Compute concentrations
        conc_values = (sample_data[target_area_col] * m_eis) / (
                        sample_data[eis_area_col] * rf_target
                      ) * Df * (1 / Ws)

        conc_data[f"{target}_Conc_Calc"] = conc_values

        # Debug printing
        if debug_compound is not None and target == debug_compound:
            print(f"DEBUG for {target}:")
            print(f"  Target area: {sample_data[target_area_col].values}")
            print(f"  EIS area: {sample_data[eis_area_col].values}")
            print(f"  Mass of EIS: {m_eis}")
            print(f"  Response factor (RF): {rf_target}")
            print(f"  Concentrations: {conc_values.values}")

    display(conc_data)

    return conc_data

def calculate_conc_EIS_cal(rawdata, EIS_RFS, dilution_NIS_stock_cal, Ws_cal,
                           EIS_NIS_analogs, Df_cal, NIS_stock, spiked_amount_cal):
    """
    Calculate concentrations for EIS compounds in calibration and QC data.
    
    Parameters
    ----------
    rawdata : DataFrame
        Input data with RR values and sample info (Type == "Cal" or "QC").
    EIS_RFS : DataFrame
        Contains Average_Response_Factor per compound.
    dilution_NIS_stock_cal : float
        Dilution factor for NIS stock.
    Ws_cal : float
        Calibration volume [L].
    EIS_NIS_analogs : dict
        Mapping from EIS compound -> NIS analog.
    Df_cal : float
        Dilution factor for calibration samples.
    NIS_stock : dict
        NIS stock concentrations (ng/mL).
    spiked_amount_cal : float
        Spiked amount for calibration [ng].
        
    Returns
    -------
    tuple of DataFrames
        (conc_cal, conc_QC) - Separate DataFrames for calibration and QC data.
        Returns (None, None) if no data found for either type.
    """
    # Compute the amount of added NIS per analog 
    NIS_added = calc_mass_added_NIS(NIS_stock, dilution_NIS_stock_cal, spiked_amount_cal)
    
    # Filter data for Cal and QC types
    cal_data = rawdata[rawdata['Type'] == 'Cal'].copy()
    qc_data = rawdata[rawdata['Type'] == 'QC'].copy()
    
    # Initialize result DataFrames
    conc_cal = None
    conc_QC = None
    
    # Get EIS compounds
    eis_compounds = set(EIS_NIS_analogs.keys())
    
    # Process Calibration data
    if not cal_data.empty:
        conc_cal = cal_data[['Name', 'Type', 'Level']].copy()
        
        for eis_compound in eis_compounds:
            nis_compound = EIS_NIS_analogs[eis_compound]
            rr_column = f"{eis_compound}_RR"
            
            if rr_column not in cal_data.columns:
                print(f"Warning: RR column {rr_column} not found in Cal data")
                continue
                
            avg_rf_row = EIS_RFS[EIS_RFS["Compound_Name"] == eis_compound]
            if avg_rf_row.empty:
                print(f"Warning: No RF found for {eis_compound}")
                continue
                
            avg_rf = avg_rf_row["Average_Response_Factor"].values[0]
            
            if nis_compound not in NIS_added:
                print(f"Warning: No NIS mass found for analog {nis_compound}")
                continue
                
            m_nis = NIS_added[nis_compound]
            
            # Concentration formula for calibration
            conc_values = (cal_data[rr_column] * m_nis / avg_rf) * Df_cal * (1 / Ws_cal)
            conc_cal[f"{eis_compound}_Conc_Calc"] = conc_values
    
    # Process QC data
    if not qc_data.empty:
        conc_QC = qc_data[['Name', 'Type', 'Level']].copy()
        
        for eis_compound in eis_compounds:
            nis_compound = EIS_NIS_analogs[eis_compound]
            rr_column = f"{eis_compound}_RR"
            
            if rr_column not in qc_data.columns:
                print(f"Warning: RR column {rr_column} not found in QC data")
                continue
                
            avg_rf_row = EIS_RFS[EIS_RFS["Compound_Name"] == eis_compound]
            if avg_rf_row.empty:
                print(f"Warning: No RF found for {eis_compound}")
                continue
                
            avg_rf = avg_rf_row["Average_Response_Factor"].values[0]
            
            if nis_compound not in NIS_added:
                print(f"Warning: No NIS mass found for analog {nis_compound}")
                continue
                
            m_nis = NIS_added[nis_compound]
            
            # Concentration formula for QC (same as calibration)
            conc_values = (qc_data[rr_column] * m_nis / avg_rf) * Df_cal * (1 / Ws_cal)
            conc_QC[f"{eis_compound}_Conc_Calc"] = conc_values
    
    # Display results
    if conc_cal is not None:
        print("Calibration Data:")
        display(conc_cal)
    
    if conc_QC is not None:
        print("QC Data:")
        display(conc_QC)
    
    return conc_cal, conc_QC


def calculate_eis_rse_modified(df, expected_concentrations, p=2, exclude_levels=None):
    """
    Calculate RSE for EIS compounds using the new dataframe structure.
    
    Parameters:
    df: DataFrame with columns ending in '_Conc_Calc'
    expected_concentrations: dict with expected concentrations for each compound
    p: power parameter for RSE calculation (default=2)
    exclude_levels: list of calibration levels to exclude (e.g., ['L1', 'L7'] or ['L3'])
    
    Returns:
    dict: RSE results for each EIS compound
    """
    # Handle exclude_levels parameter
    if exclude_levels is None:
        exclude_levels = []
    elif isinstance(exclude_levels, str):
        exclude_levels = [exclude_levels]  # Convert single string to list
    
    # Find EIS compounds by looking for columns ending with '_Conc_Calc'
    eis_compounds = []
    for col in df.columns:
        if '_Conc_Calc' in col:
            compound_name = col.replace('_Conc_Calc', '')
            # Check if it's an EIS compound (starts with isotope labels)
            if any(compound_name.startswith(prefix) for prefix in ['13C', '18O', '15N', '2H', 'D']):
                eis_compounds.append(compound_name)
                
    # Check if EIS compounds list length matches expected concentrations
    eis_in_expected = [compound for compound in eis_compounds if compound in expected_concentrations]
    print(f"EIS compounds found: {len(eis_compounds)}")
    print(f"EIS compounds with expected concentrations: {len(eis_in_expected)}")
    
    if len(eis_compounds) != len(eis_in_expected):
        missing_compounds = [compound for compound in eis_compounds if compound not in expected_concentrations]
        print(f"Warning: Missing expected concentrations for EIS compounds: {missing_compounds}")
    
    # Filter for Cal samples
    cal_data = df[df['Type'] == 'Cal'].copy()
    if cal_data.empty:
        print("No CAL samples found in the data")
        return {}
    
    # Exclude specified calibration levels
    if exclude_levels:
        print(f"Excluding calibration levels: {exclude_levels}")
        original_count = len(cal_data)
        cal_data = cal_data[~cal_data['Level'].isin(exclude_levels)]
        excluded_count = original_count - len(cal_data)
        print(f"Excluded {excluded_count} samples from levels {exclude_levels}")
        
        if cal_data.empty:
            print("Warning: No CAL samples remaining after excluding specified levels")
            return {}
    
    print(f"Number of Cal samples used for analysis: {len(cal_data)}")
    
    # Show which levels are being used
    if 'Level' in cal_data.columns:
        used_levels = sorted(cal_data['Level'].unique())
        print(f"Calibration levels used: {used_levels}")
    
    rse_results_eis = {}
    for compound in eis_compounds:
        conc_col = f"{compound}_Conc_Calc"
        
        if conc_col not in df.columns:
            print(f"Warning: Column {conc_col} not found")
            continue
            
        if compound not in expected_concentrations:
            print(f"Warning: No expected concentrations provided for {compound}")
            continue
        
        # Get actual concentrations from filtered Cal samples
        actual_concs = cal_data[conc_col].values
        expected_conc = expected_concentrations[compound]
        
        # Filter out NaN values
        valid_mask = ~np.isnan(actual_concs)
        actual_concs_clean = actual_concs[valid_mask]
        
        if len(actual_concs_clean) == 0:
            print(f"Warning: No valid data points for {compound}")
            continue
        
        # Create list of expected concentrations (same value repeated)
        expected_concs_list = [expected_conc] * len(actual_concs_clean)
        
        # Calculate RSE
        rse_result = calculate_rse(expected_concs_list, actual_concs_clean.tolist(), compound, p)
        rse_results_eis[compound] = rse_result
        
        # Print compound-specific info if excluding levels
        if exclude_levels:
            print(f"{compound}: Using {len(actual_concs_clean)} data points for RSE calculation")
    
    return rse_results_eis

def filter_expected_concs_by_level(expected_concs_dict, level):
    """Filter expected concentrations dictionary to only contain specified level."""
    return {compound: [conc_list[level - 1]] for compound, conc_list in expected_concs_dict.items()}
