In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import scipy.stats as st
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data

In [None]:
resource=pd.read_excel('/content/drive/MyDrive/capstone_project/project_progress/ML/LSTM_autoencoder/CR IP _ Resource Usage.xlsx',sheet_name='CPU, Disk Resource Usage')

In [None]:
def cpu_used(x):
    try:
        return x[:-1]
    except:
        return x

In [None]:
resource['cpu_used_clean']=resource['cpu_used'].apply(cpu_used)
resource['cpu_used_clean']=resource['cpu_used_clean'].astype(float)

In [None]:
resource.groupby('vm_id').count().sort_values(by='cpu_used_clean',ascending=False).head(10)[['cpu_used_clean']].rename({'cpu_used_clean':'count'},axis=1)

# Function

In [None]:
def create_dataset(X,y,time_steps=1):
    Xs,ys  = [],[]
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs),np.array(ys).reshape((len(ys),1,1))          

In [None]:
def get_model(x_train,y_train):
    model= keras.Sequential()

    model.add(keras.layers.LSTM(units=128,activation='relu',input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(keras.layers.Dropout(rate=0.2))

    model.add(keras.layers.RepeatVector(n=y_train.shape[1]))
    model.add(keras.layers.LSTM(units=128,activation='relu',return_sequences=True))

    model.add(keras.layers.Dropout(rate=0.2))
#    model.add(keras.layers.Dense(1)) 
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=x_train.shape[2])))

    model.compile(loss='mae',optimizer ='adam',metrics=['mae','mape'])
    return model

In [None]:
def anomaly_df(y_test_inv, y_test_pred_inv,thres):
  anomaly_df=pd.DataFrame({'y_data':y_test_inv,'y_predict':y_test_pred_inv})
  anomaly_df['difference']=anomaly_df.y_data-anomaly_df.y_predict
  THRESHOLD=thres
  anomaly_df['threshold']=THRESHOLD
  anomaly_df['anomaly']=(anomaly_df.difference>THRESHOLD)
  return anomaly_df

def plot_anom(df_anom):
  #280 data anomaly
  plt.plot(df_anom.y_data,label='original data')
  plt.plot(df_anom.y_predict,label='autoencoder recreate')
  plt.scatter(df_anom[df_anom.anomaly==True].index,df_anom[df_anom.anomaly==True].y_data,color='red')
  plt.legend()
  plt.show()

In [None]:
def scaler(train,test):
  scaler_a = StandardScaler()
  scaler_a = scaler_a.fit(train[['cpu_usage']])
  train['cpu_usage']= scaler_a.transform(train[["cpu_usage"]])
  test['cpu_usage']= scaler_a.transform(test[["cpu_usage"]])
  return scaler_a

In [None]:
def inverse_scaler(y,scaler_vm):
  return scaler_vm.inverse_transform(y.reshape(-1,1)).flatten()

# VM ID 3775

In [None]:
vm_id_3775=resource[resource.vm_id==3775].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288*7]
test=vm_id_3775.loc[288*7:288*7*2]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_norm=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_norm,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

In [None]:
thres=50
df_anom=anomaly_df(y_test_inv, y_test_pred_inv,thres)

In [None]:
plt.title('Anomaly Prediction')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plot_anom(df_anom)  # if there are different more than 50 % with user habit then considered anomaly

## Predict The Rest Of The Day (ERROR)

In [None]:
rest_day=vm_id_3775.loc[288*7*2:]


In [None]:
plt.plot(rest_day.cpu_usage)
plt.title('The Rest of The Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
TIME_STEPS=30 #larn from the habit
rest_day.cpu_usage=scaler_vm.transform(rest_day[['cpu_usage']])
x_rest,y_rest = create_dataset(rest_day[["cpu_usage"]],rest_day.cpu_usage,TIME_STEPS)

In [None]:
y_rest_pred=model.predict(x_rest)

In [None]:
y_rest_inv=inverse_scaler(y_rest,scaler_vm)
y_rest_pred_inv=inverse_scaler(y_rest_pred,scaler_vm)


In [None]:
plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_rest_inv,label='original data')
plt.plot(y_rest_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
thres=40
df_anom=anomaly_df(y_rest_inv, y_rest_pred_inv,thres)

In [None]:
plt.title('Anomaly Prediction The Rest of The Day')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plot_anom(df_anom)  # if there are different more than 50 % with user habit then considered anomaly

# VM ID 4867

In [None]:
vm_id_3775=resource[resource.vm_id==4867].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288*7]
test=vm_id_3775.loc[288*7:288*7*2]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_norm=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_norm,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

In [None]:
thres=50
df_anom=anomaly_df(y_test_inv, y_test_pred_inv,thres)

In [None]:
plt.title('Anomaly Prediction')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plot_anom(df_anom)  # no anomaly detected

## Predict The Rest Of The Day

In [None]:
rest_day=vm_id_3775.loc[288*7*2:]


In [None]:
rest_day.shape

In [None]:
plt.plot(rest_day.cpu_usage)
plt.title('The Rest of The Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
TIME_STEPS=30 #larn from the habit
rest_day.cpu_usage=scaler_vm.transform(rest_day[['cpu_usage']])
x_rest,y_rest = create_dataset(rest_day[["cpu_usage"]],rest_day.cpu_usage,TIME_STEPS)

In [None]:
y_rest_pred=model.predict(x_rest)

In [None]:
y_rest_inv=inverse_scaler(y_rest,scaler_vm)
y_rest_pred_inv=inverse_scaler(y_rest_pred,scaler_vm)


In [None]:
plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_rest_inv,label='original data')
plt.plot(y_rest_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
thres=40
df_anom=anomaly_df(y_rest_inv, y_rest_pred_inv,thres)

In [None]:
plt.title('Anomaly Prediction The Rest of The Day')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plot_anom(df_anom)  # if there are different more than 50 % with user habit then considered anomaly

# VM ID 3813

In [None]:
vm_id_3775=resource[resource.vm_id==3813].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288]
test=vm_id_3775.loc[288:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_norm=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylim(-1,75)
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_norm,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train,y_train)
start=time.time()
history = model.fit(x_train,y_train, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,0.3)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')
#plt.ylim(-2,1) #2hy
plt.legend()
plt.show()

In [None]:
thres=40
df_anom=anomaly_df(y_test_inv, y_test_pred_inv,thres)

In [None]:
plt.title('Anomaly Prediction')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plot_anom(df_anom)  # no anomaly detected

# VM ID 5564 (EROR)

In [None]:
vm_id_3775=resource[resource.vm_id==5564].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
time_steps=30
train=vm_id_3775.loc[0:150]
test=vm_id_3775.loc[150:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(train_dumy.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# VM ID 3616

In [None]:
vm_id_3775=resource[resource.vm_id==3616].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288]
test=vm_id_3775.loc[288:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# VM ID 3269

In [None]:
vm_id_3775=resource[resource.vm_id==3269].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288]
test=vm_id_3775.loc[288:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# VM ID 3658

In [None]:
vm_id_3775=resource[resource.vm_id==3658].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:288]
test=vm_id_3775.loc[288:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# VM ID 3128

In [None]:
vm_id_3775=resource[resource.vm_id==3128].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:40]
test=vm_id_3775.loc[40:]
time_steps=30
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# VM ID 4866

In [None]:
vm_id_3775=resource[resource.vm_id==4866].reset_index(drop=True)
vm_id_3775.rename({'cpu_used_clean':'cpu_usage'},axis=1,inplace=True)

In [None]:
plt.plot(vm_id_3775.cpu_usage)

## Test and Train Data

In [None]:
#test train
train=vm_id_3775.loc[0:30]
test=vm_id_3775.loc[30:]
time_steps=5
train_dumy=train.copy()
train_dumy.loc[len(train_dumy)-time_steps,'cpu_usage']=100
train_dumy_normal=train_dumy.copy()

In [None]:
plt.plot(train.cpu_usage)
plt.title('First Day CPU USAGE aka Train Data')
plt.ylabel('Cpu Usage')
plt.xlabel('Minutes')

In [None]:
plt.plot(test.cpu_usage)
plt.title('Second Day CPU USAGE aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')

In [None]:
scaler_vm=scaler(train_dumy,test)

In [None]:
scaler_vm_dumy=scaler(train_dumy_normal,train)

## Windowed Data

In [None]:
TIME_STEPS=5 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)

In [None]:
x_train_dum,y_train_dum = create_dataset(train_dumy[["cpu_usage"]],train_dumy.cpu_usage,TIME_STEPS)


In [None]:
x_train_dum.shape

## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train_dum,y_train_dum)
start=time.time()
history = model.fit(x_train_dum,y_train_dum, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')

plt.legend()
plt.show()

# Function

In [None]:
def create_dataset(X,y,time_steps=1):
    Xs,ys  = [],[]
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs),np.array(ys).reshape((len(ys),1,1))          

In [None]:
def get_model(x_train,y_train):
    model= keras.Sequential()

    model.add(keras.layers.LSTM(units=128,activation='relu',input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(keras.layers.Dropout(rate=0.2))

    model.add(keras.layers.RepeatVector(n=y_train.shape[1]))
    model.add(keras.layers.LSTM(units=128,activation='relu',return_sequences=True))

    model.add(keras.layers.Dropout(rate=0.2))
#    model.add(keras.layers.Dense(1)) 
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=x_train.shape[2])))

    model.compile(loss='mae',optimizer ='adam',metrics=['mae','mape'])
    return model

In [None]:
def get_model_recreate(x_train,y_train):
    model= keras.Sequential()

    model.add(keras.layers.LSTM(units=128,activation='relu',input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(keras.layers.Dropout(rate=0.2))

    model.add(keras.layers.RepeatVector(n=x_train.shape[1]))
    model.add(keras.layers.LSTM(units=128,activation='relu',return_sequences=True))

    model.add(keras.layers.Dropout(rate=0.2))
#    model.add(keras.layers.Dense(1)) 
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=x_train.shape[2])))

    model.compile(loss='mae',optimizer ='adam',metrics=['mae','mape'])
    return model

In [None]:
def anomaly_df(y_test_inv, y_test_pred_inv,thres):
  anomaly_df=pd.DataFrame({'y_data':y_test_inv,'y_predict':y_test_pred_inv})
  anomaly_df['difference']=anomaly_df.y_data-anomaly_df.y_predict
  THRESHOLD=thres
  anomaly_df['threshold']=THRESHOLD
  anomaly_df['anomaly']=(anomaly_df.difference>THRESHOLD)
  return anomaly_df

def plot_anom(df_anom):
  #280 data anomaly
  plt.plot(df_anom.y_data,label='original data')
  plt.plot(df_anom.y_predict,label='autoencoder recreate')
  plt.scatter(df_anom[df_anom.anomaly==True].index,df_anom[df_anom.anomaly==True].y_data,color='red')
  plt.legend()
  plt.show()

In [None]:
def scaler(train_sample,test_sample):
  scaler_a = StandardScaler()
  scaler_a = scaler_a.fit(train_sample[['cpu_usage']])
  train_sample['cpu_usage']= scaler_a.transform(train_sample[["cpu_usage"]])
  test_sample['cpu_usage']= scaler_a.transform(test_sample[["cpu_usage"]])
  return scaler_a

In [None]:
def inverse_scaler(y,scaler_vm):
  return scaler_vm.inverse_transform(y.reshape(-1,1)).flatten()

# Generated Data

## Test and Train Data

In [None]:
train=np.concatenate([np.random.normal(5,0.1,60),np.random.normal(100,0.1,60),np.random.normal(5,0.1,100),np.random.normal(7,0.1,100)])
test=np.concatenate([np.random.normal(5,0.1,30),np.random.normal(6,0.1,100),np.random.normal(30,0.1,60),np.random.normal(20,0.1,100),np.random.normal(40,0.1,100)])
train=pd.DataFrame({'cpu_usage':train})
test=pd.DataFrame({'cpu_usage':test})
train_normal=train.copy(deep=True)

In [None]:
test_2=np.concatenate([np.random.normal(5,0.1,30),np.random.normal(6,0.1,100),np.random.normal(9,0.1,60),np.random.normal(6,0.1,100)])
test_2=pd.DataFrame({'cpu_usage':test_2})

In [None]:
plt.plot(train.cpu_usage)

In [None]:
plt.plot(test.cpu_usage)

In [None]:
plt.plot(test_2.cpu_usage)

In [None]:
scaler_vm=scaler(train,test)

In [None]:
scaler_vm_2=scaler(train_normal,test_2)

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)
x_test,y_test = create_dataset(test[["cpu_usage"]],test.cpu_usage,TIME_STEPS)
x_test_2,y_test_2 = create_dataset(test_2[["cpu_usage"]],test_2.cpu_usage,TIME_STEPS)

## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train,y_train)
start=time.time()
history = model.fit(x_train,y_train, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

In [None]:
y_test_pred=model.predict(x_test)
y_train_pred=model.predict(x_train)
y_test_pred_2=model.predict(x_test_2)

In [None]:
y_train_inv=inverse_scaler(y_train,scaler_vm)
y_train_pred_inv=inverse_scaler(y_train_pred,scaler_vm)

y_test_inv=inverse_scaler(y_test,scaler_vm)
y_test_pred_inv=inverse_scaler(y_test_pred,scaler_vm)


y_test_inv_2=inverse_scaler(y_test_2,scaler_vm_2)
y_test_pred_inv_2=inverse_scaler(y_test_pred_2,scaler_vm_2)

In [None]:
#first day predict

plt.title('First Day Data aka Train Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_train_inv,label='original data')
plt.plot(y_train_pred_inv,label='autoencoder recreate')
#plt.ylim(0,5)
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv,label='original data')
plt.plot(y_test_pred_inv,label='autoencoder recreate')
plt.legend()
plt.show()

In [None]:
#second day predict
plt.title('Second Day Data aka Test Data')
plt.ylabel('cpu usage')
plt.xlabel('minutes')
plt.plot(y_test_inv_2,label='original data')
plt.plot(y_test_pred_inv_2,label='autoencoder recreate')
plt.legend()
plt.show()
# when delta equal 3 then there are jump in the graph