In [None]:
"""
This script processes an Excel file (KBB_list.xlsx) to:
1. Identify duplicate 'kbb_serial_number' values that have different 'reference_number'.
2. Extract and filter only those records where the 'kbb_serial_number' has multiple 'reference_number'.
3. Save the filtered data into a new Excel file (fraud_filtered.xlsx).
4. Display the results for quick verification.
"""

import pandas as pd  # Importing the pandas library

# Load the Excel file
df = pd.read_excel("KBB_list.xlsx")

# Filter rows where 'kbb_serial_number' appears multiple times with different 'reference_number'
duplicates = df.groupby('kbb_serial_number').filter(lambda x: x['reference_number'].nunique() > 1)

# Save the filtered data to a new Excel file
duplicates.to_excel("KBB_list_dublicate_filtered.xlsx", index=False)

# Display output
print(duplicates)


In [None]:
"""
This script processes an Excel file (KBB_list.xlsx) to:
1. Remove rows where 'kbb_serial_number' is empty.
2. Identify duplicate 'kbb_serial_number' values that have different 'reference_number'.
3. Restructure the output so that each unique 'reference_number' appears in separate columns.
4. Save the transformed data to a new Excel file (fraud_filtered.xlsx).
"""

import pandas as pd  # Importing the pandas library

# Load the Excel file
df = pd.read_excel("KBB_list.xlsx")

# Remove rows where 'kbb_serial_number' is empty
df = df.dropna(subset=['kbb_serial_number'])

# Filter rows where 'kbb_serial_number' appears multiple times with different 'reference_number'
filtered_df = df.groupby('kbb_serial_number').filter(lambda x: x['reference_number'].nunique() > 1)

# Pivot the table so each unique 'reference_number' appears in separate columns
pivot_df = filtered_df.pivot_table(
    index=['description', 'kbb_serial_number'],  # Keep 'description' and 'kbb_serial_number' as row indices
    columns=df.groupby('kbb_serial_number').cumcount() + 1,  # Assign unique columns for each 'reference_number'
    values='reference_number',  # Values to be placed in new columns
    aggfunc=lambda x: x  # Ensures correct placement of values
)

# Rename the columns for clarity
pivot_df.columns = [f'Reference {col}' for col in pivot_df.columns]

# Reset the index to convert the pivot table into a DataFrame
pivot_df = pivot_df.reset_index()

# Save the transformed data to a new Excel file
pivot_df.to_excel("KBB_list_dublicate_filtered.xlsx", index=False)

# Display output
print("Processed data saved to KBB_list_dublicate_filtered.xlsx")
