In [None]:
import sys
import csv
import torch
import importlib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch.utils.data as data
import torch.nn.functional as F
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from BDDData import *
from joblib import Parallel, delayed

In [None]:
columns = ['TurbID', 'Day', 'Tmstamp', 'Patv']
df = pd.read_csv('raw_data/sdwpf_turb_information.csv', usecols=columns)
df[df['Patv'] < 0] = 0
# df.replace(0, np.nan, inplace=True)
# df['Patv'] = df['Patv'].interpolate(method='linear')

In [None]:
# Code to determine ARIMA orders by checking stationarity and autocorrelation
turbine_data = df[df['TurbID'] == 1]

turbine_data['Datetime'] = pd.to_datetime('2024-01-01') + pd.to_timedelta(turbine_data['Day'] - 1, unit='D') + pd.to_timedelta(turbine_data['Tmstamp'] + ':00')

turbine_data.set_index('Datetime', inplace=True)
turbine_data.sort_index(inplace=True)

turbine_power = turbine_data['Patv']

result = adfuller(turbine_power.dropna())
print('ADF Statistic:', result[0])
print('p-value:', result[1])

if result[1] > 0.05:
    turbine_power_diff = turbine_power.diff().dropna()
else:
    turbine_power_diff = turbine_power

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(12, 8))
plot_acf(turbine_power_diff.dropna(), lags=48, ax=ax[0])
plot_pacf(turbine_power_diff.dropna(), lags=48, ax=ax[1])
plt.show()

In [None]:
for i in range(, 134):
    turbine_data = df[df['TurbID'] == i]['Patv'].dropna()
    fig, ax = plt.subplots(2, 1, figsize=(12, 8))
    plot_acf(turbine_data, lags=48, ax=ax[0])
    plot_pacf(turbine_data, lags=48, ax=ax[1])

In [None]:
stationary_count = 0
non_stationary_count = 0

for i in range(1, 134):
    turbine_data = df[df['TurbID'] == i]

    turbine_data['Datetime'] = pd.to_datetime('2024-01-01') + pd.to_timedelta(turbine_data['Day'] - 1, unit='D') + pd.to_timedelta(turbine_data['Tmstamp'] + ':00')

    turbine_data.set_index('Datetime', inplace=True)
    turbine_data.sort_index(inplace=True)

    turbine_power = turbine_data['Patv']

    result = adfuller(turbine_power.dropna())
    print(f'ID: {i}, ADF: {result[0]}, p-value: {result[1]}')
    
    if result[1] < 0.05:
        stationary_count += 1
    else:
        non_stationary_count += 1

print(stationary_count, non_stationary_count)

In [None]:
for turbine_id in df['TurbID'].unique():
    turbine_data = df[df['TurbID'] == turbine_id]['Patv'].dropna()
    print(turbine_id)
    plot_acf(turbine_data, lags=48, alpha=0.05)
    plot_pacf(turbine_data, lags=48, alpha=0.05)
plt.show()