In [None]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

In [None]:
rows = 500000
t1 = 10
t2 = 11

In [None]:
# time = np.random.uniform(2, 5, size=20)
time = np.random.randint(t1, t2, size = rows)

In [None]:
data_df = pd.DataFrame()

In [None]:
data_df['time_in_mins'] = np.cumsum(time)

In [None]:
data_df.head(30)

In [None]:
data = np.arange(0,2,1) 
data1 = np.repeat(data,5)
data2 = np.tile(data1,50000)

In [None]:
data_df['product_quality'] = data2

In [None]:
data_df.head(20)

In [None]:
data_df['product_quality'].replace(0,"Standard",inplace=True)
data_df['product_quality'].replace(1,"Warning",inplace=True)

In [None]:
data_df.describe()

In [None]:
warning = pd.DataFrame()
standard = pd.DataFrame()
warning = data_df[data_df.product_quality=="Warning"]
standard = data_df[data_df.product_quality =="Standard"]


In [None]:
a = 250000
b = 250000
current_std = np.random.uniform(24,30,size = a)
voltage_std = np.random.uniform(220,245,size = a)
freq_std = np.random.uniform(500,2000,size = a)
rot_std = np.random.uniform(600,900,size = a)
temp_std = np.random.uniform(21,29,size = a)
hum_std = np.random.uniform(30,55,size = a)
current_war = np.random.uniform(28,34,size = b)
voltage_war = np.random.uniform(200,240,size = b)
freq_war = np.random.uniform(1000,3000,size = b)
rot_war = np.random.uniform(500,700,size = b)
temp_war = np.random.uniform(23,40,size =b)
hum_war = np.random.uniform(30,55,size = b)
avb04_std = np.random.uniform(75,99,size = a)
avb04_war = np.random.uniform(40,75,size = b)
avb26_std = np.random.uniform(75,99,size = a)
avb26_war = np.random.uniform(0,40,size = b)
avb48_std = np.random.uniform(75,99,size = a)
avb48_war = np.random.uniform(40,75,size = b)


In [None]:
standard['current'] = current_std.astype('int').tolist()
standard['voltage'] = voltage_std.astype('int').tolist()
standard['frequency'] = freq_std.astype('int').tolist()
standard['rot_speed'] = rot_std.astype('int').tolist()
standard['temperature'] = temp_std.astype('int').tolist()
standard['humidity'] = hum_std.astype('int').tolist()
standard['availability_0_4'] = avb04_std.astype('int').tolist()
standard['availability_2_6'] = avb26_std.astype('int').tolist()
standard['availability_4_8'] = avb48_std.astype('int').tolist()

warning['current'] = current_war.astype('int').tolist()
warning['voltage'] = voltage_war.astype('int').tolist()
warning['frequency'] = freq_war.astype('int').tolist()
warning['rot_speed'] = rot_war.astype('int').tolist()
warning['temperature'] = temp_war.astype('int').tolist()
warning['humidity'] = hum_war.astype('int').tolist()
warning['availability_0_4'] = avb04_war.astype('int').tolist()
warning['availability_2_6'] = avb26_war.astype('int').tolist()
warning['availability_4_8'] = avb48_war.astype('int').tolist()

In [None]:
data_df = pd.concat([warning,standard]) 

In [None]:
data_df = data_df.sort_values('time_in_mins')

In [None]:
data_df.head(10)

In [None]:
feature_cols = ['current','voltage','frequency','rot_speed','temperature','humidity']
X = data_df[feature_cols]
X.head()


y1 = data_df['availability_0_4']
y2 = data_df['availability_2_6']
y3 = data_df['availability_4_8']

In [None]:
X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.25)
reg = LinearRegression().fit(X_train, y1_train)
print(reg.score(X,y1))
predictions = reg.predict(X)
data_df['availability_0_4_predicted'] = predictions

In [None]:
data_df['availability_0_4_new'] = (data_df['availability_0_4'] + data_df['availability_0_4_predicted'])/2
y = data_df['availability_0_4_new']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
regressor = RandomForestRegressor(n_estimators = 100, max_depth = 15, random_state = 0)
regressor.fit(X_train,y_train)
print(regressor.score(X,y))

In [None]:
X_train, X_test, y2_train, y2_test = train_test_split(X, y2, test_size=0.25)
reg = LinearRegression().fit(X_train, y2_train)
print(reg.score(X,y2))
predictions = reg.predict(X)
data_df['availability_2_6_predicted'] = predictions

In [None]:
data_df['availability_2_6_new'] = (data_df['availability_2_6'] + data_df['availability_2_6_predicted'])/2
y = data_df['availability_2_6_new']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
regressor = RandomForestRegressor(n_estimators = 100, max_depth = 15, random_state = 0)
regressor.fit(X_train,y_train)
print(regressor.score(X,y))

In [None]:
X_train, X_test, y3_train, y3_test = train_test_split(X, y3, test_size=0.25)
reg = LinearRegression().fit(X_train, y3_train)
print(reg.score(X,y3))
predictions = reg.predict(X)
data_df['availability_4_8_predicted'] = predictions

In [None]:
data_df['availability_4_8_new'] = (data_df['availability_4_8'] + data_df['availability_4_8_predicted'])/2
y = data_df['availability_4_8_new']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
regressor = RandomForestRegressor(n_estimators = 100, max_depth = 15, random_state = 0)
regressor.fit(X_train,y_train)
print(regressor.score(X,y))