## Year 2021

In [None]:
#Import library
import pandas as pd
import csv 

# Read DEVICE data for 2021 
device_data = pd.read_csv("DEVICE2021.txt", delimiter="|", encoding="latin1", on_bad_lines='skip')

# Read FOI_TEXT data for 2021
foitext_data = pd.read_csv("foitext2021.txt", delimiter="|", encoding="latin1")
# Filter for pacemaker medical devices
pacemaker_devices = device_data[device_data['GENERIC_NAME'].str.contains('pacemaker', case=False, na=False)]

# Merge datasets on MDR_REPORT_KEY
merged_data = pd.merge(pacemaker_devices, foitext_data, on='MDR_REPORT_KEY', how='inner')

# Drop duplicates based on MDR_REPORT_KEY
merged_data = merged_data.drop_duplicates(subset='MDR_REPORT_KEY')
# List of columns to remove
columns_to_remove = ['DEVICE_EVENT_KEY', 'IMPLANT_FLAG', 'DATE_REMOVED_FLAG', 'MANUFACTURER_D_ZIP_CODE_EXT', 'OTHER_ID_NUMBER', 'DATE_RETURNED_TO_MANUFACTURER', 'MANUFACTURER_D_ADDRESS_2', 'DATE_REPORT', 'PATIENT_SEQUENCE_NUMBER', 'LOT_NUMBER']

# Drop specified columns
merged_data = merged_data.drop(columns=columns_to_remove, errors='ignore')

# Save CSV file
merged_data.to_csv('merged_data_cleaned_2021.csv', index=False)

print("Dataset for 2021 saved successfully.")

## Year 2022

In [None]:
#Import library
import pandas as pd
import csv 

# Read DEVICE data for 2022 
device_data = pd.read_csv("DEVICE2022.txt", delimiter="|", encoding="latin1", on_bad_lines='skip')

# Display the first few rows 
print("DEVICE Data Sample:")
print(device_data.head())
# Read the FOI_TEXT data for 2022
foitext_data = pd.read_csv("foitext2022.txt", delimiter="|", encoding="latin1")

# Display the first few rows 
print("\nFOI_TEXT Data Sample:")
print(foitext_data.head())
# Filter for pacemaker medical devices
pacemaker_devices = device_data[device_data['GENERIC_NAME'].str.contains('pacemaker', case=False, na=False)]

# Merge datasets on MDR_REPORT_KEY
merged_data = pd.merge(pacemaker_devices, foitext_data, on='MDR_REPORT_KEY', how='inner')

# Drop duplicates based on MDR_REPORT_KEY
merged_data = merged_data.drop_duplicates(subset='MDR_REPORT_KEY')
# List of columns to remove
columns_to_remove = ['DEVICE_EVENT_KEY', 'IMPLANT_FLAG', 'DATE_REMOVED_FLAG', 'MANUFACTURER_D_ZIP_CODE_EXT', 'OTHER_ID_NUMBER', 'DATE_RETURNED_TO_MANUFACTURER', 'MANUFACTURER_D_ADDRESS_2', 'DATE_REPORT', 'PATIENT_SEQUENCE_NUMBER', 'LOT_NUMBER']

# Drop specified columns
merged_data = merged_data.drop(columns=columns_to_remove, errors='ignore')

# Save  CSV file
merged_data.to_csv('merged_data_cleaned_2022.csv', index=False)

print("Dataset for 2022 saved successfully.")

## Year 2023

In [None]:
import pandas as pd
import csv 

# Read the DEVICE data
device_data = pd.read_csv("DEVICE2023.txt", delimiter="|", encoding="latin1")

# Read the FOI_TEXT data
foitext_data = pd.read_csv("foitext2023.txt", delimiter="|", encoding="latin1")
# Display the first few rows
print("DEVICE Data Sample:")
print(device_data.head())
print("\nFOI_TEXT Data Sample:")
print(foitext_data.head())
# Filter for pacemaker medical devices
pacemaker_devices = device_data[device_data['GENERIC_NAME'].str.contains('pacemaker', case=False, na=False)]
# Merge datasets on MDR_REPORT_KEY
merged_data = pd.merge(pacemaker_devices, foitext_data, on='MDR_REPORT_KEY', how='inner')

# Drop duplicates based on MDR_REPORT_KEY
merged_data = merged_data.drop_duplicates(subset='MDR_REPORT_KEY')

# Display merged data sample after removing duplicates
print("\nMerged Data Sample (After Removing Duplicates):")
print(merged_data.head())

# List of columns to remove
columns_to_remove = ['DATE_REMOVED_FLAG', 'MANUFACTURER_D_ZIP_CODE_EXT', 'OTHER_ID_NUMBER', 'DATE_RETURNED_TO_MANUFACTURER', 'MANUFACTURER_D_ADDRESS_2', 'DATE_REPORT', 'PATIENT_SEQUENCE_NUMBER', 'LOT_NUMBER']

# Drop specified columns
merged_data = merged_data.drop(columns=columns_to_remove, errors='ignore')

# Save merged_data to a CSV file
merged_data.to_csv('merged_data_cleaned.csv', index=False)

print("Dataset saved successfully.")

## Combined all 3 years 

In [2]:
import pandas as pd

# Load the cleaned datasets
file_path_2021 = 'final_2021.csv'
file_path_2022 = 'final_2022.csv'
file_path_2023 = 'final_2023.csv'

cleaned_data_2021 = pd.read_csv(file_path_2021)
cleaned_data_2022 = pd.read_csv(file_path_2022)
cleaned_data_2023 = pd.read_csv(file_path_2023)

# Concatenate the datasets
combined_data = pd.concat([cleaned_data_2021, cleaned_data_2022, cleaned_data_2023])

# Save the combined dataset to a new CSV file
combined_file_path = 'combined_data_2021_2022_2023.csv'
combined_data.to_csv(combined_file_path, index=False)

print("Combined dataset saved successfully.")


Combined dataset saved successfully.
