# Prepare a .csv file from EPIC export to import on secuTrial

created by: Yasaman Safarkhanlo on 2025.03.03

last modified: file name

In [1]:
import pandas as pd
import os
import numpy as np
from datetime import datetime
from pathlib import Path

import openpyxl
import json
import os
from typing import Dict, Any, Optional, Tuple

In [None]:
base_dir = Path("/app/data")

# Dynamically find the latest export folder
latest_sT_export = max((base_dir / "sT-files").glob("export-*"), key=lambda x: x.stat().st_mtime, default=None)
latest_EPIC_export = max((base_dir / "EPIC-files").glob("export-*"), key=lambda x: x.stat().st_mtime, default=None)

if latest_sT_export:
    secuTrial_base_dir = latest_sT_export
    REVASC_base_dir = secuTrial_base_dir / "REVASC"
else:
    raise FileNotFoundError("No valid secuTrial export directory found.")

if latest_EPIC_export:
    epic_base_dir = latest_EPIC_export
else:
    raise FileNotFoundError("No valid EPIC export directory found.")

## Read/prepare ready to import variables from EPIC export

In [3]:
def prefix_map(file_type):
    """Returns the prefix based on the file type for renaming columns."""
    prefix_dict = {
        "encounter": "enct.",
        "flowsheet": "flow.",
        "imaging": "img.",
        "lab": "lab.",
        "medication": "med.",
        "monitor": "mon."
    }
    return prefix_dict.get(file_type, "")

In [4]:
def merge_excel_files(directory, merge_column):
    """
    Merges all EPIC files in a directory based on a specific column, in a defined order.

    Parameters:
        directory (str or Path): Directory containing files.
        merge_column (str): Column name to use for merging files.

    Returns:
        pd.DataFrame
    """
    # Define merge order and column prefixes
    merge_order = {
        "encounters": "enct.",
        "flowsheet": "flow.",
        "imaging": "img.",
        "lab": "lab.",
        "medication": "med.",
        "monitor": "mon."
    }

    directory = Path(directory)
    if not directory.exists():
        raise FileNotFoundError(f"Directory {directory} does not exist.")

    merged_df = pd.DataFrame()

    # Merge files in the defined order
    for keyword, prefix in merge_order.items():
        for file in directory.glob(f"*{keyword}*"):
            if file.suffix.lower() in [".xlsx", ".xls", ".csv"]:  # Check for valid file extensions
                merged_df = merge_single_file(file, merge_column, merged_df, prefix)

    for file in directory.glob("*"):
        if file.suffix.lower() in [".xlsx", ".xls", ".csv"]:  # Ensure only valid file types are processed
            if not any(keyword in file.stem for keyword in merge_order):
                merged_df = merge_single_file(file, merge_column, merged_df)

    return merged_df

def merge_single_file(file_path, merge_column, merged_df, prefix=""):
    """
    Merges a single file into the main DataFrame with optional prefixing of columns.

    Parameters:
        file_path (str or Path): Path to file.
        merge_column (str): Column name to merge on.
        merged_df (pd.DataFrame): The main DataFrame to merge into.
        prefix (str): Optional prefix to add to column names for this file.

    Returns:
        pd.DataFrame: Updated merged DataFrame.
    """
    # Determine file extension
    file_extension = file_path.suffix.lower()

    # Read file based on its extension
    try:
        if file_extension == ".xlsx":
            df = pd.read_excel(file_path, engine='openpyxl')
        elif file_extension == ".xls":
            df = pd.read_excel(file_path, engine='xlrd')
        elif file_extension == ".csv":
            df = pd.read_csv(file_path)
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return merged_df  # Return the existing DataFrame without merging

    # Print number of columns in the current file
    print(f"File: {file_path.name} | Columns: {len(df.columns)} , Rows: {df.shape[0]}")

    # Add prefix to columns except the merge column
    df.rename(columns={col: f"{prefix}{col}" for col in df.columns if col != merge_column}, inplace=True)

    # Merge the DataFrame into the main DataFrame
    return df if merged_df.empty else merged_df.merge(df, on=merge_column, how="outer")

In [5]:
# Directory containing EPIC export files
directory = epic_base_dir

try:
    df_EPIC_all = merge_excel_files(directory, merge_column="PAT_ENC_CSN_ID")
    print(f"Merged DataFrame shape: {df_EPIC_all.shape}")
except FileNotFoundError as e:
    print(f"Error: Directory not found - {e}")
except Exception as e:
    print(f"An error occurred: {e}")

File: encounters.csv | Columns: 16 , Rows: 1701
File: flowsheet.csv | Columns: 31 , Rows: 1701
File: imaging.csv | Columns: 15 , Rows: 1701
File: lab.csv | Columns: 14 , Rows: 1701
File: medication.csv | Columns: 23 , Rows: 1701
File: monitor.csv | Columns: 13 , Rows: 1701
Merged DataFrame shape: (1701, 107)


## Assign SSR-IDs from the log-file

In [6]:
id_log = pd.read_excel(base_dir / 'EPIC2sT-pipeline/Identification_log_SSR_2024_ohne PW_26.03.25.xlsx')

# Set the first row as column names and drop it from the data
id_log.columns = id_log.iloc[0]
id_log = id_log.iloc[1:].reset_index(drop=True)  # Reset index for clarity

# Rename columns for consistency
id_log.rename(columns={'Fall-Nr.': 'FID', 'SSR Identification SSR-INS-000....': 'SSR'}, inplace=True)

In [7]:
df_EPIC_all['FID'] = df_EPIC_all['img.FID'].fillna(0).astype(int)
df_EPIC_all.insert(0, 'FID', df_EPIC_all.pop('FID'))

# Merge with df_EPIC_all on 'FID' and reorder columns
df_EPIC_all = df_EPIC_all.merge(id_log[['FID', 'SSR']], on='FID', how='left')
df_EPIC_all.insert(1, 'SSR', df_EPIC_all.pop('SSR'))  # Move 'SSR' to the second column

## Merge with SSR secuTrial export - existing variables

In [8]:
def read_and_modify_secuTrial_export(df):
    df = df.drop([7])                   # Remove row 8 in Excel
    df = df.iloc[6:]                    # Skip the first 6 rows
    df.columns = df.iloc[0]             # Use row 6 as the header
    df = df[1:].reset_index(drop=True)  # Drop the header row and reset index
    return df

def safe_read_file(file_path, custom_reader=None):
    """
    Safely reads a file (Excel or CSV), with an option for a custom reader function.
    
    Parameters:
        file_path (str or Path)
        custom_reader (function, optional)

    Returns:
        pd.DataFrame
    """
    file_path = Path(file_path)
    file_extension = file_path.suffix.lower()

    try:
        # Read based on file extension
        if file_extension in [".xlsx", ".xls"]:
            df = pd.read_excel(file_path, engine='openpyxl' if file_extension == ".xlsx" else 'xlrd', header=None)
        elif file_extension == ".csv":
            df = pd.read_csv(file_path)
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
        
        # Apply custom reader if provided
        return custom_reader(df) if custom_reader else df

    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except Exception as e:
        print(f"Error reading file at {file_path}: {e}")
    
    return None

In [9]:
base_dir = Path("/Users/yaskhanloo/Developer/bern-storke-center")

# Dynamically find the latest export folder
latest_sT_export = max((base_dir / "sT-files").glob("export-*"), key=lambda x: x.stat().st_mtime, default=None)
latest_EPIC_export = max((base_dir / "EPIC-files").glob("export-*"), key=lambda x: x.stat().st_mtime, default=None)

if latest_sT_export:
    secuTrial_base_dir = latest_sT_export
    REVASC_base_dir = secuTrial_base_dir / "REVASC"
else:
    raise FileNotFoundError("No valid secuTrial export directory found.")

if latest_EPIC_export:
    epic_base_dir = latest_EPIC_export
else:
    raise FileNotFoundError("No valid EPIC export directory found.")

# Define file paths
file_path_secuTrial = secuTrial_base_dir / 'SSR_cases_of_2024.xlsx'
file_path_REVASC = REVASC_base_dir / 'report_SSR01_20250218-105747.xlsx'

#file_path_EPIC = epic_base_dir / 'encounters.xlsx'
file_path_EPIC = epic_base_dir / 'encounters.csv'

# Read files
df_secuTrial = safe_read_file(file_path_secuTrial, custom_reader=read_and_modify_secuTrial_export)
df_REVASC = safe_read_file(file_path_REVASC, custom_reader=read_and_modify_secuTrial_export)

df_EPIC = safe_read_file(file_path_EPIC)

# Print data frame sizes
if df_secuTrial is not None and df_EPIC is not None and df_REVASC is not None:
    print(f'df_secuTrial size: {df_secuTrial.shape}, df_REVASC: {df_REVASC.shape}, df_EPIC size: {df_EPIC.shape}')
else:
    print("Failed to load one or both dataframes.")

  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")


df_secuTrial size: (1795, 174), df_REVASC: (4979, 256), df_EPIC size: (1701, 16)


In [10]:
# Check for unnamed columns in df_secuTrial
unnamed_columns_secuTrial = [col for col in df_secuTrial.columns if not isinstance(col, str) or not col or col.startswith('Unnamed')]
if unnamed_columns_secuTrial:
    print(f'Unnamed columns in df_secuTrial: {unnamed_columns_secuTrial}')
else:
    print('No unnamed columns found in df_secuTrial.')

# Check for unnamed columns in df_REVASC
unnamed_columns_REVASC = [col for col in df_REVASC.columns if not isinstance(col, str) or not col or col.startswith('Unnamed')]
if unnamed_columns_REVASC:
    print(f'Unnamed columns in df_REVASC: {unnamed_columns_REVASC}')
else:
    print('No unnamed columns found in df_REVASC.')

# Merge df_REVASC into df_secuTrial based on Case ID, adding suffix to shared columns
df_secuTrial_w_REVAS = df_secuTrial.merge(
    df_REVASC,
    how='left',
    left_on='Case ID',
    right_on='CaseID',
    suffixes=('', '.revas')  # No suffix for df_secuTrial, '.revas' for df_REVASC
)

df_secuTrial_w_REVAS.drop(columns=['CaseID'], inplace=True, errors='ignore')
df_secuTrial_w_REVAS.reset_index(drop=True, inplace=True)

print(f'df_secuTrial_w_REVAS size: {df_secuTrial_w_REVAS.shape}')

Unnamed columns in df_secuTrial: [nan]
Unnamed columns in df_REVASC: [nan]
df_secuTrial_w_REVAS size: (1795, 429)


In [11]:
# assign FID to SSR export
df_secuTrial_w_REVAS['SSR'] = df_secuTrial_w_REVAS['Case ID'].str.extract(r'(\d+)$').astype(int)
df_secuTrial_w_REVAS.insert(1, 'SSR', df_secuTrial_w_REVAS.pop('SSR'))
df_secuTrial_w_REVAS = df_secuTrial_w_REVAS.drop(columns=['nan'])

# Merge with df_secuTrial_w_REVAS on 'SSR' and reorder columns
df_secuTrial_w_REVAS = df_secuTrial_w_REVAS.merge(id_log[['SSR', 'FID']], on='SSR', how='left')
df_secuTrial_w_REVAS.insert(0, 'FID', df_secuTrial_w_REVAS.pop('FID'))  # Move 'FID' to the first column

In [12]:
# find the corresponding variables between sT and EPIC
# Find common values in 'FID' and 'SSR'
common_values = df_secuTrial_w_REVAS[['FID', 'SSR']].merge(df_EPIC_all[['FID', 'SSR']], on=['FID', 'SSR'], how='inner')

# Filter both DataFrames to keep only matching rows
df_sT_common = df_secuTrial_w_REVAS.merge(common_values, on=['FID', 'SSR'], how='inner')
df_ep_common = df_EPIC_all.merge(common_values, on=['FID', 'SSR'], how='inner')

In [13]:
# Find rows that exist only in df1
df_sT_only = df_secuTrial_w_REVAS.merge(df_EPIC_all[['FID', 'SSR']], on=['FID', 'SSR'], how='left', indicator=True).query('_merge == "left_only"').drop(columns=['_merge'])

# Find rows that exist only in df2
df_ep_only = df_EPIC_all.merge(df_secuTrial_w_REVAS[['FID', 'SSR']], on=['FID', 'SSR'], how='left', indicator=True).query('_merge == "left_only"').drop(columns=['_merge'])

In [14]:
df_sT_only.shape, df_ep_only.shape

((360, 430), (269, 109))

In [15]:
# apply the mapping to the sT data

In [16]:
# fill in/replace the entered values from sT to EPIC

## Prepare the .csv file using dictionaries

In [17]:
# Define reusable mappings
yes_no_mapping = {0: 'no', 1: 'yes', False: 'no', True: 'yes'}
bilateral_mapping = {0: 'no', 1: '', 2: 'right', 3: 'left', 4: 'bilateral'}
prosthetic_valves_mapping = {0: 'None', 1: 'Biological', 2: 'Mechanical'}
image_type_mapping = {1: 'CT', 2: 'MRI', 3: 'CT (external)', 4: 'MRI (external)'}
transport_map = {1: 'Ambulance', 2: 'Helicopter', 3: 'Other (taxi,self,relatives,friends...)'}
discharge_dest_map = {
    1: 'Home', 
    3: 'Rehabilitation Hospital', 
    2: 'Other acute care hospital', 
    4: 'Nursing home, palliative care center, or other medical facility'
}

# Define common mappings for multiple columns
yes_no_columns = [
    'flow.iat_stentintracran', 
    'flow.iat_stentextracran', 
    'flow.stroke_pre', 
    'flow.tia_pre', 
    'flow.ich_pre',
    'flow.hypertension', 
    'flow.diabetes', 
    'flow.hyperlipidemia', 
    'flow.smoking', 
    'flow.atrialfib', 
    'flow.chd',
    'flow.lowoutput', 
    'flow.pad', 
    'flow.decompression', 
    'img.iat_mech', 
    'img.follow_mra', 
    'img.follow_cta',
    'img.follow_ultrasound', 
    'img.follow_dsa', 
    'img.follow_tte', 
    'img.follow_tee', 
    'img.follow_holter',
    'med.aspirin_pre', 
    'med.clopidogrel_pre', 
    'med.prasugrel_pre', 
    'med.ticagrelor_pre', 
    'med.dipyridamole_pre',
    'med.vka_pre', 
    'med.rivaroxaban_pre', 
    'med.dabigatran_pre', 
    'med.apixaban_pre', 
    'med.edoxaban_pre',
    'med.parenteralanticg_pre', 
    'med.antihypertensive_pre', 
    'med.antilipid_pre', 
    'med.hormone_pre',
    'med.treat_antiplatelet', 
    'med.treat_anticoagulant', 
    'med.treat_ivt'
]

bilateral_columns = ['flow.mca', 'flow.aca', 'flow.pca', 'flow.vertebrobasilar']

# Define value mappings for specific columns
value_mappings = {
    'enct.non_swiss': {True: 'yes'},
    'enct.sex': {1: 'Male', 2: 'Female'},
    'enct.transport': transport_map,
    'enct.discharge_destinat': discharge_dest_map,  # Double-check mapping values
    'flow.firstangio_result': {2: 'no', 3: 'yes'},  # Double-check mapping values
    'flow.prostheticvalves': prosthetic_valves_mapping,
    'img.firstimage_type': image_type_mapping,
}

# Apply yes_no_mapping and bilateral_mapping to multiple columns dynamically
value_mappings.update({col: yes_no_mapping for col in yes_no_columns})
value_mappings.update({col: bilateral_mapping for col in bilateral_columns})

In [18]:
def create_import_file(df_EPIC, dictionary, start_id=13744):
    """Creates a DataFrame for import into secuTrial using values mapped from EPIC data."""
    

    # Load the mapping file and initialize the import DataFrame
    mapping_df = pd.read_excel(dictionary)
    today_date = datetime.today().date()

    df_import = pd.DataFrame({
        'case_id': [f'SSR-INS-{i}' for i in range(start_id, start_id + len(df_EPIC))],
        'visit_name': ["Acute Phase"] * len(df_EPIC),
        'center_id': ["Bern Inselspital (SSR)"] * len(df_EPIC),
        'entry_date': [today_date] * len(df_EPIC)
    })

    for _, row in mapping_df.iterrows():
        column_source = row['EPIC_table']
        if pd.isna(column_source): continue

        prefix = ""
        if "Encounters" in column_source: prefix = "enct."
        elif "Flowsheet" in column_source: prefix = "flow."
        elif "Imaging" in column_source: prefix = "img."
        elif "Lab" in column_source: prefix = "lab."
        elif "Medications" in column_source: prefix = "med."

        epic_column = f"{prefix}{row['EPIC_field']}"
        secuTrial_column = f"{row['secuTrial_import_table']}.{row['secuTrial_import_field']}"

        if epic_column in df_EPIC.columns:
            df_import[secuTrial_column] = df_EPIC[epic_column]
        else:
            df_import[secuTrial_column] = ''

    # Merge _date and _time columns into a single datetime column where both exist
    for date_col in df_import.columns:
        if date_col.endswith('_date'):
            time_col = date_col.replace('_date', '_time')
            if time_col in df_import.columns:
                # Combine _date and _time columns if both are present
                combined_datetime = df_import[date_col].astype(str) + ' ' + df_import[time_col].astype(str)

                # Parse combined datetime and format it
                df_import[date_col] = pd.to_datetime(combined_datetime, errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')

                # Drop original _time column after merging
                df_import.drop(columns=[time_col], inplace=True)
            else:
                # Format _date column alone if no corresponding _time column
                df_import[date_col] = pd.to_datetime(df_import[date_col], errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')

    # Check if both `enct.arrival_date` and `enct.arrival_time` exist in df_import
    if 'enct.arrival_date' in df_import.columns and 'enct.arrival_time' in df_import.columns:
        # Combine `enct.arrival_date` and `enct.arrival_time` into a single datetime column
        combined_datetime = df_import['enct.arrival_date'].astype(str) + ' ' + df_import['enct.arrival_time'].astype(str)
    
        # Parse and format the combined datetime column
        df_import['enct.arrival_date'] = pd.to_datetime(combined_datetime, errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')
    
        # Drop the original `enct.arrival_time` column after merging
        df_import.drop(columns=['enct.arrival_time'], inplace=True)

    # Round height and weight columns to the nearest integer and cast them to integer if they exist
    for col in ['Acute.height', 'Acute.weight']:
        if col in df_import.columns:
            df_import[col] = np.ceil(df_import[col]).astype('Int64')

    # Remove Acute.zip values if Acute.non_swiss is 1
    if 'Acute.non_swiss' in df_import.columns and 'Acute.zip' in df_import.columns:
        df_import.loc[df_import['Acute.non_swiss'] == 1, 'Acute.zip'] = ''

    # Format integer columns as needed
    for column in df_import.columns:
        if df_import[column].dropna().isin([0, 1, 2, 3, 4]).all():
            df_import[column] = df_import[column].astype('Int64')
        elif pd.api.types.is_numeric_dtype(df_import[column]):
            df_import[column] = df_import[column].round(1)

    return df_import

In [19]:
import_file_df = create_import_file(df_EPIC_all, base_dir / 'sT-import-validation/map_epic2secuTrial_import.xlsx', start_id=13744)
# Save the import file to a CSV
import_file_df.to_csv(base_dir / 'EPIC2sT-pipeline/SSR-INS-2024_import.csv', index=False, sep=';')
print('file shape:', import_file_df.shape)


file shape: (1704, 86)


  df_import[date_col] = pd.to_datetime(df_import[date_col], errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')
  df_import[date_col] = pd.to_datetime(df_import[date_col], errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')
  df_import[date_col] = pd.to_datetime(df_import[date_col], errors='coerce').dt.strftime('%d.%m.%Y %H:%M:%S')


In [20]:
import_file_df.head(20)

Unnamed: 0,case_id,visit_name,center_id,entry_date,Acute.name_last,Acute.name_first,Acute.birth_date,Acute.zip,Acute.non_swiss,Acute.arrival_date,...,Hosp.decompression_date,Hosp.discharge_destinat,Hosp.discharge_date,REVASC.statins_dosage,REVASC.antihyper_dosage,REVASC.level_doac,REVASC.admis_platelets,REVASC.admis_haem,REVASC.admis_lecuco,REVASC.admis_crp
0,SSR-INS-13744,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Stucki,Thomas,28.04.1936 00:00:00,3112,0,02.03.2024 00:00:00,...,,3.0,04.03.2024 00:00:00,ATORVASTATIN FILMTABL 40 MG,TRIATEC TABL 2.5 MG / NEBILET PER ORAL,,153.0,139.0,5.4,3
1,SSR-INS-13745,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Krähenbühl,Kurt,05.10.1940 00:00:00,3018,0,02.03.2024 00:00:00,...,,,15.03.2024 00:00:00,,TORASEMID TABL 5 MG / AMLODIPIN TABL 5 MG,235.0,159.0,137.0,5.8,2
2,SSR-INS-13746,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Rothen,Hans,07.04.1947 00:00:00,3503,0,02.03.2024 00:00:00,...,,4.0,02.03.2024 00:00:00,ATORVASTATIN LACTAB 20 MG,AMLODIPIN TABL 5 MG,,287.0,145.0,6.0,<1
3,SSR-INS-13747,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Ak,Mehmet,01.04.1958 00:00:00,3422,0,03.03.2024 00:00:00,...,,4.0,05.03.2024 00:00:00,EZETIMIB AXAPHARM TABL 10 MG,,,145.0,137.0,5.8,11
4,SSR-INS-13748,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Marti,Fritz,09.10.1929 00:00:00,3073,0,03.03.2024 00:00:00,...,,4.0,11.03.2024 00:00:00,,OLMESARTAN-AMLODIPIN MEPHA FILMTABL 40/5MG / T...,,272.0,90.0,6.6,11
5,SSR-INS-13749,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Gobeli,Walter,06.01.1943 00:00:00,3772,0,04.03.2024 00:00:00,...,,4.0,14.03.2024 00:00:00,EZETIMIB PER ORAL / ROSUVASTATIN AXAPHARM FILM...,TORASEMID SANDOZ ECO TABL 10 MG / NEBIVOLOL TA...,0.49,179.0,67.0,6.7,3
6,SSR-INS-13750,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Keller,Kurt,22.07.1958 00:00:00,3250,0,05.03.2024 00:00:00,...,,4.0,08.03.2024 00:00:00,,,,121.0,148.0,4.5,
7,SSR-INS-13751,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Sahli,Urs,04.10.1939 00:00:00,3113,0,05.03.2024 00:00:00,...,,4.0,22.03.2024 00:00:00,,,,180.0,158.0,7.0,1
8,SSR-INS-13752,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Wirz,Susanne,30.10.1942 00:00:00,2525,0,05.03.2024 00:00:00,...,,4.0,06.03.2024 00:00:00,,AMLODIPIN VALSARTAN HCT MEPHA 5/160/25 / AMLOD...,,149.0,111.0,10.7,7
9,SSR-INS-13753,Acute Phase,Bern Inselspital (SSR),04.04.2025 00:00:00,Rüdisühli,Renate,07.08.1941 00:00:00,3006,0,05.03.2024 00:00:00,...,,3.0,07.03.2024 00:00:00,,IRBESARTAN FILMTABL 150 MG,,334.0,130.0,8.3,5


In [21]:
# Exclude columns that start with "REVASC"
import_file_df_filtered = import_file_df.loc[:, ~import_file_df.columns.str.startswith('REVASC')]

# Save to CSV
output_path = 'import_file_output.csv'

import_file_df_filtered.to_csv(output_path, index=False)

# Print confirmation and show the file shape
print('File saved to:', output_path)
print('File shape:', import_file_df_filtered.shape)

File saved to: import_file_output.csv
File shape: (1704, 79)


# change to .py

In [None]:
#!jupyter nbconvert --to python your_notebook.ipynb --output your_script_name