In [None]:
import pandas as pd
import numpy as np

In [None]:
# Creating a sample time series data for demonstration purposes
date_rng = pd.date_range(start='2015-01-01', end='2022-12-31', freq='D')
df_sample = pd.DataFrame(date_rng, columns=['date'])
df_sample['load_region_1'] = np.random.randint(0, 100, size=(len(date_rng)))
df_sample['load_region_2'] = np.random.randint(0, 100, size=(len(date_rng)))
df_sample['load_region_3'] = np.random.randint(0, 100, size=(len(date_rng)))
df_sample.set_index('date', inplace=True)
df_sample.head()

In [None]:
def process_data_for_region(df, region_column):
    # Resampling data monthly and calculating cumulative sum
    monthly_data = df[region_column].resample('M').sum()
    # Creating a pivot table with years as index and months as columns
    pivot_data = monthly_data.pivot_table(index=monthly_data.index.year, columns=monthly_data.index.month, values=region_column, aggfunc='sum')
    # Removing the year 2020
    pivot_data = pivot_data.drop(2020, errors='ignore')
    # Calculating percentage change compared to the same month in the year before
    pct_change_data = pivot_data.pct_change() * 100
    # Combining the cumulative load and its percentage change
    for col in pivot_data.columns:
        pivot_data[col] = pivot_data[col].astype(str) + ' (' + pct_change_data[col].round(2).astype(str) + '%)'
    return pivot_data

# Processing data for each region
region_1_table = process_data_for_region(df_sample, 'load_region_1')
region_2_table = process_data_for_region(df_sample, 'load_region_2')
region_3_table = process_data_for_region(df_sample, 'load_region_3')

region_1_table

AttributeError: 'Series' object has no attribute 'pivot_table'

In [None]:
def process_data_for_region(df, region_column):
    # Resampling data monthly and calculating cumulative sum
    monthly_data = df[region_column].resample('M').sum()
    # Creating a DataFrame with years and months
    monthly_df = pd.DataFrame({
        'Year': monthly_data.index.year,
        'Month': monthly_data.index.month,
        'Load': monthly_data.values
    })
    # Creating a pivot table with years as index and months as columns
    pivot_data = monthly_df.pivot(index='Year', columns='Month', values='Load')
    # Removing the year 2020
    pivot_data = pivot_data.drop(2020, errors='ignore')
    # Calculating percentage change compared to the same month in the year before
    pct_change_data = pivot_data.pct_change() * 100
    # Combining the cumulative load and its percentage change
    for col in pivot_data.columns:
        pivot_data[col] = pivot_data[col].astype(str) + ' (' + pct_change_data[col].round(2).astype(str) + '%)'
    return pivot_data

# Processing data for each region
region_1_table = process_data_for_region(df_sample, 'load_region_1')
region_2_table = process_data_for_region(df_sample, 'load_region_2')
region_3_table = process_data_for_region(df_sample, 'load_region_3')

region_1_table

In [None]:
# Adjusting the function to use month numbers instead of names
def process_data_for_region_v2(df, region_column):
    # Resampling data monthly and calculating cumulative sum
    monthly_data = df[region_column].resample('M').sum()
    # Creating a DataFrame with years and months
    monthly_df = pd.DataFrame({
        'Year': monthly_data.index.year,
        'Month': monthly_data.index.month,
        'Load': monthly_data.values
    })
    # Creating a pivot table with years as index and months as columns
    pivot_data = monthly_df.pivot(index='Year', columns='Month', values='Load')
    # Removing the year 2020
    pivot_data = pivot_data.drop(2020, errors='ignore')
    # Calculating percentage change compared to the same month in the year before
    pct_change_data = pivot_data.pct_change() * 100
    # Combining the cumulative load and its percentage change
    for col in pivot_data.columns:
        pivot_data[col] = pivot_data[col].astype(str) + ' (' + pct_change_data[col].round(2).astype(str) + '%)'
    return pivot_data

# Processing data for each region using the updated function
region_1_table_v2 = process_data_for_region_v2(df_sample, 'load_region_1')
region_2_table_v2 = process_data_for_region_v2(df_sample, 'load_region_2')
region_3_table_v2 = process_data_for_region_v2(df_sample, 'load_region_3')

region_1_table_v2

In [None]:
def add_aggregated_columns(df):
    # Extracting the raw values and percentage changes
    df_values = df.applymap(lambda x: float(x.split(' ')[0]))
    df_pct_changes = df.applymap(lambda x: float(x.split(' ')[1].replace('(', '').replace('%)', '')) / 100)

    # Calculating aggregated values and percentage changes
    df_values['FG'] = df_values[1] + df_values[2]
    df_values['JV'] = df_values[7] + df_values[8]
    df_values['Calendar'] = df_values.sum(axis=1)

    df_pct_changes['FG'] = (df_values['FG'] / (df_values['FG'] - df_values[1] - df_values[2])) - 1
    df_pct_changes['JV'] = (df_values['JV'] / (df_values['JV'] - df_values[7] - df_values[8])) - 1
    df_pct_changes['Calendar'] = df_values['Calendar'].pct_change()

    # Combining the aggregated values and their percentage changes
    for col in ['FG', 'JV', 'Calendar']:
        df[col] = df_values[col].astype(str) + ' (' + (df_pct_changes[col] * 100).round(2).astype(str) + '%)'
    return df

# Adding aggregated columns to each region table
region_1_table_v3 = add_aggregated_columns(region_1_table_v2)
region_2_table_v3 = add_aggregated_columns(region_2_table_v2)
region_3_table_v3 = add_aggregated_columns(region_3_table_v2)

region_1_table_v3

In [None]:
def add_aggregated_columns_v2(df):
    # Extracting the raw values and percentage changes
    df_values = df.applymap(lambda x: float(x.split(' ')[0]))

    # Calculating aggregated values
    df_values['FG'] = df_values[1] + df_values[2]
    df_values['JV'] = df_values[7] + df_values[8]
    df_values['Cal'] = df_values.sum(axis=1)

    # Calculating percentage changes for the aggregated columns
    df_pct_changes = df_values[['FG', 'JV', 'Cal']].pct_change() * 100

    # Combining the aggregated values and their percentage changes
    for col in ['FG', 'JV', 'Cal']:
        df[col] = df_values[col].astype(str) + ' (' + df_pct_changes[col].round(2).astype(str) + '%)'
    return df

# Adding aggregated columns to each region table using the updated function
region_1_table_v4 = add_aggregated_columns_v2(region_1_table_v2)
region_2_table_v4 = add_aggregated_columns_v2(region_2_table_v2)
region_3_table_v4 = add_aggregated_columns_v2(region_3_table_v2)

region_1_table_v4

In [None]:
# Sample data for demonstration purposes
data = {
    'date': ['2015-01-01', '2015-01-02', '2015-01-03', '2016-01-01', '2016-01-02', '2016-01-03', '2017-01-01', '2017-01-02', '2017-01-03'],
    'load_region_1': ['1000', '1100', '1200', '1050', '1150', '1250', '1100', '1200', '1300'],
    'load_region_2': ['2000', '2100', '2200', '2050', '2150', '2250', '2100', '2200', '2300'],
    'load_region_3': ['3000', '3100', '3200', '3050', '3150', '3250', '3100', '3200', '3300']
}
df_sample = pd.DataFrame(data)

# Ensuring the 'date' column is of datetime data type
df_sample['date'] = pd.to_datetime(df_sample['date'])

# Ensuring the load columns are of float data type
for col in ['load_region_1', 'load_region_2', 'load_region_3']:
    df_sample[col] = df_sample[col].astype(float)

# Setting the 'date' column as the index
df_sample.set_index('date', inplace=True)

df_sample.dtypes