In [174]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [175]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Layer, LSTM
from keras.optimizers import Adam

In [176]:
import pandas as pd
import cufflinks as cf

# Настройка режима офлайн для работы с cufflinks и plotly
cf.go_offline()

# Загрузка данных из файла
data = pd.read_csv('Data/Sunspots.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Визуализация данных (например, потребление энергии по глобальной активной мощности)
data['Monthly Mean Total Sunspot Number'].iplot(title='Monthly Mean Total Sunspot', xTitle='Date', yTitle='Monthly Mean Total Sunspot Number')

In [177]:
data.head()
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3265 entries, 1749-01-31 to 2021-01-31
Data columns (total 2 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Id                                 3265 non-null   int64  
 1   Monthly Mean Total Sunspot Number  3265 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 76.5 KB


In [178]:
scaler = MinMaxScaler(feature_range=(0, 1))
data['Monthly Mean Total Sunspot Number'] = scaler.fit_transform(data['Monthly Mean Total Sunspot Number'].values.reshape(-1, 1))

In [179]:
train_size = int(len(data) * 0.67)
test_size = len(data) - train_size
train, test = data[0:train_size], data[train_size:len(data)]

In [180]:
look_back = 10
X_train, y_train = [], []
for i in range(look_back, len(train)):
    X_train.append(train['Monthly Mean Total Sunspot Number'][i - look_back:i])
    y_train.append(train['Monthly Mean Total Sunspot Number'][i])

X_train, y_train = np.array(X_train), np.array(y_train)

X_test, y_test = [], test['Monthly Mean Total Sunspot Number'].values
for i in range(look_back, len(test)):
    X_test.append(test['Monthly Mean Total Sunspot Number'][i - look_back:i])
    
X_test, y_test = np.array(X_test), np.array(y_test)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [186]:
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1), kernel_initializer='uniform', recurrent_initializer='uniform'))
model.add(Dense(1))

model.compile(loss='mean_absolute_error', optimizer='Adam')
model.fit(X_train, y_train, epochs=9, batch_size=1, verbose=2)

Epoch 1/9
2177/2177 - 8s - loss: 0.0581 - 8s/epoch - 4ms/step
Epoch 2/9
2177/2177 - 7s - loss: 0.0502 - 7s/epoch - 3ms/step
Epoch 3/9
2177/2177 - 7s - loss: 0.0487 - 7s/epoch - 3ms/step
Epoch 4/9
2177/2177 - 7s - loss: 0.0478 - 7s/epoch - 3ms/step
Epoch 5/9
2177/2177 - 7s - loss: 0.0472 - 7s/epoch - 3ms/step
Epoch 6/9
2177/2177 - 7s - loss: 0.0468 - 7s/epoch - 3ms/step
Epoch 7/9
2177/2177 - 7s - loss: 0.0467 - 7s/epoch - 3ms/step
Epoch 8/9
2177/2177 - 7s - loss: 0.0464 - 7s/epoch - 3ms/step
Epoch 9/9
2177/2177 - 7s - loss: 0.0463 - 7s/epoch - 3ms/step


<keras.callbacks.History at 0x2f49286b850>

In [182]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

inv_train_predict = scaler.inverse_transform(train_predict)
inv_y_train = scaler.inverse_transform(y_train.reshape(-1, 1))
inv_test_predict = scaler.inverse_transform(test_predict)
inv_y_test = scaler.inverse_transform(y_test[look_back:].reshape(-1, 1))

train_score = np.sqrt(mean_squared_error(inv_y_train, inv_train_predict[:,0]))
print('Train Score: %.2f RMSE' % (train_score))
test_score = np.sqrt(mean_squared_error(inv_y_test, inv_test_predict[:,0]))
print('Test Score: %.2f RMSE' % (test_score))

Train Score: 25.36 RMSE
Test Score: 25.37 RMSE


In [187]:
print(inv_y_test[:,0])
print(inv_test_predict[:,0])

[17.6 18.8 18.7 ... 34.  21.8 10.4]
[26.039047 23.084291 22.21784  ... 11.035759 22.68516  22.189802]


In [188]:
df_test = pd.DataFrame({
    'Y_test': inv_y_test.ravel(),
    'Predictions': inv_test_predict.ravel()
})
# Отобразить на графике
df_test.iplot(title="Прогноз vs Реальные значения", xTitle="Время", yTitle="Значение", theme="solar")