# Converting .abf to NWB
- see `clampfit_abf_standardization_nwb.pptx`
- see Sharepoint: `Documents/data/data_standardization/pclamp_clampfit_icephys`
- see `ephys_nwb_params.xlsx`: this is maintained by experimenter
- **BE SURE TO SET**
    - **EXCEL FILE NAME** (`ephys_nwb_params.xlsx` in this example)
    - **ABF DIRECTORY** (`abf_data` in this example)

In [None]:
import pandas as pd
from pathlib import Path
from neuroconv.datainterfaces import AbfInterface
from datetime import datetime

# Path to Excel and ABF files
excel_path = Path("./ephys_nwb_params.xlsx")
ECEPHY_DATA_PATH = Path("./abf_data")
output_folder = Path("./nwb_files")
output_folder.mkdir(parents=True, exist_ok=True)

# Setup logging
log_file = output_folder / f"conversion_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
error_csv = output_folder / f"error_experiments_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"

def log_message(message, print_to_console=True):
    """Write message to log file and optionally print to console"""
    with open(log_file, 'a') as f:
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        f.write(f"[{timestamp}] {message}\n")
    if print_to_console:
        print(message)

# Track errors for CSV output
error_records = []

# Load Excel file
log_message("Starting NWB conversion process")
log_message(f"Loading Excel file: {excel_path}")

df = pd.read_excel(excel_path)
df = df.iloc[1:]  # Drop the first row after the header

# Clean data: remove completely empty rows and rows with missing critical data
initial_row_count = len(df)
df = df.dropna(how="all")  # Remove completely empty rows
df = df.dropna(subset=["EXPERIMENT ID", ".abf file"], how="any")  # Must have these
df = df.reset_index(drop=True)

rows_removed = initial_row_count - len(df)
log_message(f"Removed {rows_removed} empty or incomplete rows. {len(df)} rows remaining.")

# Helper function to safely get values (returns empty string for NaN)
def safe_get(value, default=""):
    return value if pd.notna(value) else default

# Helper function to ensure .abf extension
def ensure_abf_extension(filename):
    """Append .abf extension if not already present"""
    if pd.isna(filename):
        return filename
    filename = str(filename).strip()
    if not filename.lower().endswith('.abf'):
        filename += '.abf'
    return filename

# Apply .abf extension fix to the column
df[".abf file"] = df[".abf file"].apply(ensure_abf_extension)

# Group by EXPERIMENT ID
grouped = df.groupby("EXPERIMENT ID")
total_experiments = len(grouped)
log_message(f"Processing {total_experiments} experiments")

successful_conversions = 0
failed_conversions = 0

for experiment_id, group in grouped:
    try:
        log_message(f"\n--- Processing experiment {experiment_id} ---")
        first_row = group.iloc[0]

        # Construct icephys_metadata with safe value extraction
        icephys_metadata = {
            "cell_id": safe_get(first_row["cell_id"]),
            "slice_id": safe_get(first_row["slice_id"]),
            "targeted_layer": safe_get(first_row["targeted_layer"]),
            "inferred_layer": safe_get(first_row.get("inferred_layer", "")),
            "recording_sessions": [
                {
                    "abf_file_name": safe_get(row[".abf file"]),
                    "stimulus_type": safe_get(row["stimulus_type"]),
                    "icephys_experiment_type": safe_get(row["icephys_experiment_type"])
                }
                for _, row in group.iterrows()
                if pd.notna(row[".abf file"])  # Extra safety check
            ]
        }

        # Verify ABF files exist
        abf_file_paths = []
        missing_files = []
        for _, row in group.iterrows():
            if pd.notna(row[".abf file"]):
                file_path = ECEPHY_DATA_PATH / row[".abf file"]
                if not file_path.exists():
                    warning_msg = f"Warning: ABF file not found: {file_path}"
                    log_message(warning_msg)
                    missing_files.append(row[".abf file"])
                    continue
                abf_file_paths.append(file_path)

        if not abf_file_paths:
            error_msg = f"Skipping experiment {experiment_id}: no valid ABF files found"
            log_message(error_msg)
            # Add all ABF files from this experiment to error records
            for _, row in group.iterrows():
                error_records.append({
                    "EXPERIMENT ID": experiment_id,
                    ".abf file": row[".abf file"],
                    "error_type": "no_valid_files"
                })
            failed_conversions += 1
            continue

        log_message(f"Found {len(abf_file_paths)} valid ABF files for experiment {experiment_id}")

        # Instantiate data interface
        interface = AbfInterface(
            file_paths=abf_file_paths,
            icephys_metadata=icephys_metadata
        )

        # Retrieve and update metadata
        metadata = interface.get_metadata()
        metadata['NWBFile'].update(
            identifier=str(experiment_id),
            session_description=safe_get(first_row["session_description"]),
            lab="my lab name",
            institution="My University",
            experimenter=["John Doe", "Jane Doe"]
        )
        metadata["Subject"] = {
            "subject_id": safe_get(first_row["subject_id"]),
            "species": safe_get(first_row["species"]),
            "genotype": safe_get(first_row["genotype"]),
            "sex": safe_get(first_row["sex"]),
            "date_of_birth": str(safe_get(first_row["date_of_birth"]))
        }

        # Run conversion
        nwb_output_path = output_folder / f"{experiment_id}.nwb"
        interface.run_conversion(nwbfile_path=nwb_output_path, metadata=metadata)

        success_msg = f"✓ Finished NWB conversion for experiment {experiment_id}"
        log_message(success_msg)
        successful_conversions += 1

    except Exception as e:
        error_msg = f"✗ Error processing experiment {experiment_id}: {str(e)}"
        log_message(error_msg)
        # Add all ABF files from this experiment to error records
        for _, row in group.iterrows():
            error_records.append({
                "EXPERIMENT ID": experiment_id,
                ".abf file": row[".abf file"],
                "error_type": f"conversion_error: {str(e)}"
            })
        failed_conversions += 1
        continue

# Write summary to log
log_message("\n" + "="*50)
log_message("CONVERSION SUMMARY")
log_message("="*50)
log_message(f"Total experiments: {total_experiments}")
log_message(f"Successful conversions: {successful_conversions}")
log_message(f"Failed conversions: {failed_conversions}")
log_message(f"Success rate: {(successful_conversions/total_experiments)*100:.1f}%")

# Save error CSV
if error_records:
    error_df = pd.DataFrame(error_records)
    error_df.to_csv(error_csv, index=False)
    log_message(f"\nError details saved to: {error_csv}")
    log_message(f"Total error records: {len(error_records)}")
else:
    log_message("\nNo errors encountered - all conversions successful!")

log_message(f"\nLog file saved to: {log_file}")
print(f"\n{'='*50}")
print(f"Conversion complete! Check {log_file} for details.")
print(f"{'='*50}")

  args_to_set['date_of_birth'] = _add_missing_timezone(date_of_birth)
  args_to_set['session_start_time'] = _add_missing_timezone(session_start_time)


✓ Finished NWB conversion for experiment 2025053001


  args_to_set['date_of_birth'] = _add_missing_timezone(date_of_birth)
  args_to_set['session_start_time'] = _add_missing_timezone(session_start_time)


✓ Finished NWB conversion for experiment 2025053101
