<a href="https://colab.research.google.com/github/yonabell/Report/blob/main/Reporting_code_with_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import os

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
def clean_data(file_path):
    # Load the data with no predefined header
    cpt_df = pd.read_excel(file_path, engine='openpyxl', header=4)

    # Filtering rows and extracting only 'A' followed by nine digits
    filtered_cpt_df = cpt_df[cpt_df['Unnamed: 0'].str.extract(r'(A\d{9})', expand=False).notna()].copy()

    # Extracting the ID
    filtered_cpt_df['ID'] = filtered_cpt_df['Unnamed: 0'].str.extract(r'(A\d{9})', expand=False)

    # Reorder columns to place 'ID' next to 'Unnamed: 0'
    cols = filtered_cpt_df.columns.tolist()
    id_index = cols.index('ID')
    cols = cols[:1] + [cols[id_index]] + cols[1:id_index] + cols[id_index + 1:]
    filtered_cpt_df = filtered_cpt_df[cols]

    # Drop 'Unnamed: 0' column
    filtered_cpt_df = filtered_cpt_df.drop('Unnamed: 0', axis=1)

    # Drop specific columns (1 to 7)
    filtered_cpt_df = filtered_cpt_df.drop(filtered_cpt_df.columns[1:8], axis=1)

    # Select every 7th column starting from the 8th column (index 7)
    column_names = filtered_cpt_df.columns.tolist()
    columns_to_drop = column_names[7::7]

    # Drop the selected columns from the DataFrame
    filtered_cpt_df.drop(columns=columns_to_drop, inplace=True)

    return filtered_cpt_df

In [None]:
# Call the clean_data function
filtered_cpt_df = clean_data(file_path)

In [None]:
# Display the filtered_cpt_df
filtered_cpt_df

In [None]:
def aggregate_data(filtered_df):
    # Columns to be aggregated (excluding 'ID')
    agg_cols = ['Purch. Volume', 'S-Revenues', 'Profit Margin I',
                'PM I % of Purchased Volume', 'Profit Margin I*',
                'PM I* % of Purchased Volume']

    # Get all columns that start with the names in agg_cols
    cols_to_convert = [col for col in filtered_df.columns if any(col.startswith(agg_col) for agg_col in agg_cols)]

    # Convert the selected columns to numeric
    filtered_df[cols_to_convert] = filtered_df[cols_to_convert].apply(pd.to_numeric, errors='coerce')

    # Create a new DataFrame to store the aggregated results
    aggregated_df = pd.DataFrame(columns=['ID'] + agg_cols)

    # Group the DataFrame by 'ID' and aggregate
    for id_val, group in filtered_df.groupby('ID'):
        new_row = {'ID': id_val}
        for agg_col in agg_cols:
            new_row[agg_col] = group[[col for col in group.columns if col.startswith(agg_col)]].sum(axis=1).sum()  # Sum across rows and then sum the results
        aggregated_df = pd.concat([aggregated_df, pd.DataFrame([new_row])], ignore_index=True)

    return aggregated_df


In [None]:
# Call the clean_data function
aggregated_df = aggregate_data(filtered_cpt_df)

In [None]:
# Display the aggregated_df
aggregated_df

In [None]:
def rename_columns(aggregated_df):
    # Define the new column names
    new_column_names = {
        'ID': 'BPID',
        'Purch. Volume': 'PurchVolume',
        'S-Revenues': 'Revenue',
        'Profit Margin I': 'PMI',
        'PM I % of Purchased Volume': 'PMIRate',
        'Profit Margin I*': 'PMIStar',
        'PM I* % of Purchased Volume': 'PMIStarRate'
    }

    # Rename the columns
    aggregated_df = aggregated_df.rename(columns=new_column_names)

    # Return the modified DataFrame
    return aggregated_df

In [None]:
# call the rename function
aggregated_df = rename_columns(aggregated_df)

In [None]:
# Display the aggregated_df
aggregated_df

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
# Call the clean_data function
filtered_cpt_df = clean_data(file_path)

In [None]:
# Create a dictionary to store the month-wise column mappings
month_columns = {}
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

for i, month in enumerate(months):
  start_col_index = 1 + (i * 6)  # Starting column index for the month
  end_col_index = start_col_index + 6
  month_columns[month] = filtered_cpt_df.columns[start_col_index:end_col_index].tolist()

In [None]:
# Define the months you want to filter for (dynamically)
filtered_months = ['February', 'March']  # Example: Change this list as needed

# Create a list to store the selected columns
selected_columns = ['ID']

# Loop through the filtered months and add corresponding columns to the list
for month in filtered_months:
  if month in month_columns:
    selected_columns.extend(month_columns[month])

# Filter the DataFrame with selected columns
filtered_df = filtered_cpt_df[selected_columns]

# Display the filtered DataFrame
filtered_df

In [None]:
# Call the clean_data function
aggregated_df = aggregate_data(filtered_cpt_df)

In [None]:
# call the rename function
aggregated_df = rename_columns(aggregated_df)