In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [3]:
import pandas as pd

# Load the dataset from a CSV file
df = pd.read_csv('./data/household_power_consumption_cleaned.csv', parse_dates=['datetime'])

# Set datetime as the index for time-series operations
df.set_index('datetime', inplace=True)

# Applying rolling window with different window sizes for better analysis
window_sizes = [3, 7, 14, 30, 60, 200]  # Short-term, medium-term, long-term trends

for window in window_sizes:
    df[f'Global_active_power_MA_{window}'] = df['Global_active_power'].rolling(window=window).mean()
    df[f'Global_reactive_power_MA_{window}'] = df['Global_reactive_power'].rolling(window=window).mean()
    df[f'Voltage_MA_{window}'] = df['Voltage'].rolling(window=window).mean()

# Display the first few rows to verify the calculations
print(df.head())

                     Global_active_power  Global_reactive_power  Voltage  \
datetime                                                                   
2006-12-16 17:24:00                4.216                  0.418   234.84   
2006-12-16 17:25:00                5.360                  0.436   233.63   
2006-12-16 17:26:00                5.374                  0.498   233.29   
2006-12-16 17:27:00                5.388                  0.502   233.74   
2006-12-16 17:28:00                3.666                  0.528   235.68   

                     Global_intensity  Sub_metering_1  Sub_metering_2  \
datetime                                                                
2006-12-16 17:24:00              18.4             0.0             1.0   
2006-12-16 17:25:00              23.0             0.0             1.0   
2006-12-16 17:26:00              23.0             0.0             2.0   
2006-12-16 17:27:00              23.0             0.0             1.0   
2006-12-16 17:28:00          

In [4]:
df.dropna(inplace=True)

In [5]:
df.to_csv('./data/power_consumption_with_moving_averages.csv')

In [6]:
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,Global_active_power_MA_3,Global_reactive_power_MA_3,Voltage_MA_3,...,Voltage_MA_14,Global_active_power_MA_30,Global_reactive_power_MA_30,Voltage_MA_30,Global_active_power_MA_60,Global_reactive_power_MA_60,Voltage_MA_60,Global_active_power_MA_200,Global_reactive_power_MA_200,Voltage_MA_200
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-12-16 20:43:00,3.372,0.048,235.05,14.2,0.0,0.0,17.0,3.334,0.057333,234.703333,...,233.577857,3.273467,0.053733,233.428,3.288167,0.0752,233.121833,3.59198,0.10403,233.91385
2006-12-16 20:44:00,3.366,0.048,234.96,14.2,0.0,0.0,17.0,3.371333,0.048667,234.88,...,233.607857,3.272733,0.050667,233.484667,3.288133,0.073433,233.168833,3.58773,0.10218,233.91445
2006-12-16 20:45:00,3.386,0.05,235.77,14.2,0.0,0.0,17.0,3.374667,0.048667,235.26,...,233.691429,3.273267,0.049467,233.547,3.288733,0.071733,233.238833,3.57786,0.10025,233.92515
2006-12-16 20:46:00,3.496,0.146,236.76,14.8,0.0,0.0,17.0,3.416,0.081333,235.83,...,233.905,3.278733,0.054333,233.649667,3.291133,0.071633,233.323833,3.56847,0.09849,233.9425
2006-12-16 20:47:00,3.426,0.146,234.59,14.6,0.0,0.0,17.0,3.436,0.114,235.706667,...,233.982857,3.2826,0.0592,233.708333,3.292167,0.0715,233.363333,3.55866,0.09671,233.94675
