In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import scipy.stats as st
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data

In [None]:
resource=pd.read_excel('/content/drive/MyDrive/capstone_project/project_progress/ML/LSTM_autoencoder/CR IP _ Resource Usage.xlsx',sheet_name='CPU, Disk Resource Usage')

In [None]:
def cpu_used(x):
    try:
        return x[:-1]
    except:
        return x

In [None]:
resource['cpu_used_clean']=resource['cpu_used'].apply(cpu_used)
resource['cpu_used_clean']=resource['cpu_used_clean'].astype(float)

In [None]:
resource

In [None]:
resource.groupby('vm_id').count().sort_values(by='cpu_used_clean',ascending=False).head(10)[['cpu_used_clean']].rename({'cpu_used_clean':'count'},axis=1)

# Function

In [None]:
def create_dataset(X,y,time_steps=1):
    Xs,ys  = [],[]
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs),np.array(ys).reshape((len(ys),1,1))          

In [None]:
def get_model(x_train,y_train):
    model= keras.Sequential()

    model.add(keras.layers.LSTM(units=128,activation='relu',input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(keras.layers.Dropout(rate=0.2))

    model.add(keras.layers.RepeatVector(n=y_train.shape[1]))
    model.add(keras.layers.LSTM(units=128,activation='relu',return_sequences=True))

    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=x_train.shape[2])))

    model.compile(loss='mae',optimizer ='adam',metrics=['mae','mape'])
    return model

In [None]:
def anomaly_df(y_test_inv, y_test_pred_inv,thres):
  anomaly_df=pd.DataFrame({'y_data':y_test_inv,'y_predict':y_test_pred_inv})
  anomaly_df['difference']=anomaly_df.y_data-anomaly_df.y_predict
  THRESHOLD=thres
  anomaly_df['threshold']=THRESHOLD
  anomaly_df['anomaly']=(anomaly_df.difference>THRESHOLD)
  return anomaly_df

def plot_anom(df_anom):
  #280 data anomaly
  plt.plot(df_anom.y_data,label='original data')
  plt.plot(df_anom.y_predict,label='autoencoder recreate')
  plt.scatter(df_anom[df_anom.anomaly==True].index,df_anom[df_anom.anomaly==True].y_data,color='red')
  plt.legend()
  plt.show()

In [None]:
def scaler(train,test):
  scaler_a = StandardScaler()
  scaler_a = scaler_a.fit(train[['cpu_usage']])
  train['cpu_usage']= scaler_a.transform(train[["cpu_usage"]])
  test['cpu_usage']= scaler_a.transform(test[["cpu_usage"]])
  return scaler_a

In [None]:
def inverse_scaler(y,scaler_vm):
  return scaler_vm.inverse_transform(y.reshape(-1,1)).flatten()

In [None]:
def model_pred(model,data_2d,TIME_STEPS):
  return model.predict(data_2d.reshape(1,TIME_STEPS,1))

# Generated Data

## Test and Train Data

In [None]:
train=np.concatenate([np.random.normal(8,0.1,200),np.random.normal(5,0.1,200),np.random.normal(100,0.1,200),np.random.normal(6,0.1,200),np.random.normal(7,0.1,200)])
train=pd.DataFrame({'cpu_usage':train})


In [None]:
plt.plot(train.cpu_usage)

In [None]:
scaler_a = StandardScaler()
scaler_a = scaler_a.fit(train[['cpu_usage']])
train['cpu_usage']= scaler_a.transform(train[["cpu_usage"]])

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train,y_train)
start=time.time()
history = model.fit(x_train,y_train, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

## Predict on Train and Test Data

### Constant Trend

In [None]:
for i in range (10,110,10):

  TIME_STEPS=30
  test=np.concatenate([np.random.normal(i,0.1,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))

### Half 5, half 10

In [None]:
# half 5, half 10
TIME_STEPS=30
test=np.concatenate([np.random.normal(5,0.1,15),np.random.normal(10,0.1,15)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# half 10, half 5
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,15),np.random.normal(5,0.1,15)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

### Suddent Spike 90

In [None]:
# majority 10, last 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,29),np.random.normal(90,0.1,1)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, first 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(90,0.1,1),np.random.normal(10,0.1,29)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,28),np.random.normal(90,0.1,1),np.random.normal(10,0.1,1)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,27),np.random.normal(90,0.1,1),np.random.normal(10,0.1,2)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,26),np.random.normal(90,0.1,1),np.random.normal(10,0.1,3)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

### Linear Trend

In [None]:
# linear from 10 to 20
TIME_STEPS=30
test=np.concatenate([np.linspace(10,20,30)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# linear from 10 to 30
TIME_STEPS=30
test=np.concatenate([np.linspace(10,30,30)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# linear trend
for i in range(10,110,10):
  TIME_STEPS=30
  test=np.concatenate([np.linspace(1,i,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))

### Linear Trend Negative

In [None]:
# linear trend
for i in range(10,110,10):
  TIME_STEPS=30
  test=np.concatenate([np.linspace(i,1,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))

# Generated Data 2

## Test and Train Data

In [None]:
train=np.concatenate([np.random.normal(8,0.1,200),np.random.normal(5,0.1,200),np.random.normal(100,0.1,200),np.random.normal(6,0.1,200),np.random.normal(7,0.1,200),np.random.normal(20,0.1,200),np.random.normal(30,0.1,200),np.random.normal(40,0.1,200),np.random.normal(50,0.1,200),np.random.normal(60,0.1,200),np.random.normal(70,0.1,200),np.random.normal(80,0.1,200),np.random.normal(90,0.1,200),np.linspace(1,100,1000)])
train=pd.DataFrame({'cpu_usage':train})


In [None]:
train.to_csv('/content/drive/MyDrive/capstone_project/project_progress/ML/LSTM_autoencoder/fitting_data.csv')

In [None]:
plt.scatter(train.index,train.cpu_usage)
plt.ylabel('cpu usage')
plt.xlabel('time')
plt.title('Cpu Usage Training Data')

In [None]:
scaler_a = StandardScaler()
scaler_a = scaler_a.fit(train[['cpu_usage']])
train['cpu_usage']= scaler_a.transform(train[["cpu_usage"]])

In [None]:
scaler_a = StandardScaler()
scaler_a = scaler_a.fit(train[['cpu_usage']])

In [None]:
 scaler_a.transform(train[["cpu_usage"]])

In [None]:
train[['cpu_usage']].mean()

In [None]:
train[['cpu_usage']].std()

In [None]:
scaler_a.mean_[0] 

In [None]:
scaler_a.scale_[0]

In [None]:
standar_sc=(train.cpu_usage-scaler_a.mean_[0])/scaler_a.scale_[0]

In [None]:
standar_sc *   scaler_a.scale_[0]+scaler_a.mean_[0] #inverse

In [None]:
train.cpu_usage

## Windowed Data

In [None]:
TIME_STEPS=30 #larn from the habit
x_train,y_train = create_dataset(train[["cpu_usage"]],train.cpu_usage,TIME_STEPS)


## Fitting On First Day Data ( Train Data )

In [None]:
model = get_model(x_train,y_train)
start=time.time()
history = model.fit(x_train,y_train, epochs =50 , batch_size=64,validation_split=0.1,shuffle = False)
print(time.time()-start)

In [None]:
model.save('/content/drive/MyDrive/capstone_project/project_progress/ML/LSTM_autoencoder/model_data_generated.h5')

## Predict on Train and Test Data

### Constant Trend

In [None]:
for i in range (10,110,10):

  TIME_STEPS=30
  test=np.concatenate([np.random.normal(i,0.1,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))

### Half 5, half 10

In [None]:
# half 5, half 10
TIME_STEPS=30
test=np.concatenate([np.random.normal(5,0.1,15),np.random.normal(10,0.1,15)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# half 10, half 5
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,15),np.random.normal(5,0.1,15)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

### Suddent Spike 90

In [None]:
# majority 10, last 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,29),np.random.normal(90,0.1,1)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, first 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(90,0.1,1),np.random.normal(10,0.1,29)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,28),np.random.normal(90,0.1,1),np.random.normal(10,0.1,1)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,27),np.random.normal(90,0.1,1),np.random.normal(10,0.1,2)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# majority 10, midel 90
TIME_STEPS=30
test=np.concatenate([np.random.normal(10,0.1,26),np.random.normal(90,0.1,1),np.random.normal(10,0.1,3)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

### Linear Trend

In [None]:
# linear from 10 to 20
TIME_STEPS=30
test=np.concatenate([np.linspace(10,20,30)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# linear from 10 to 30
TIME_STEPS=30
test=np.concatenate([np.linspace(10,30,30)]).reshape(-1,1)
pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
print(scaler_a.inverse_transform(pred.reshape(-1,1)))

In [None]:
# linear trend
for i in range(10,110,10):
  TIME_STEPS=30
  test=np.concatenate([np.linspace(1,i,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))

### Linear Trend Negative

In [None]:
# linear trend
for i in range(10,110,10):
  TIME_STEPS=30
  test=np.concatenate([np.linspace(i,1,30)]).reshape(-1,1)
  pred=model_pred(model,scaler_a.transform(test),TIME_STEPS)
  print(i,':',scaler_a.inverse_transform(pred.reshape(-1,1)))