The aim of this task is to determine whether the punch data can be used to predict the coil data and vice versa

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from npfeintool import FeatEx
from npfeintool import CON


In [2]:

class RegressionEvaluation:
    
    def KNN_Regressor_Evaluation(X, y):
    
        '''
        This is KNN regressor function performs KNN regression on the features X and labels y inputed. 
        And then output evaluation of the regression using evaluation metrics like mean_absolute_error, mean_squared_error, and r2_score.  
        '''
        
    
        # Splitting the dataset into the Training set and Test set
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
        
        # Feature Scaling
        from sklearn.preprocessing import StandardScaler
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
    
        y_train = sc.fit_transform(y_train)
        y_test = sc.transform(y_test)

        # Fitting KNN regressor to the Training set
        from sklearn.neighbors import KNeighborsRegressor
        regressor = KNeighborsRegressor (n_neighbors=5, metric= 'minkowski', p=2, algorithm='auto' )
        regressor.fit(X_train, y_train)
        
        # Predicting the Test set results
        y_pred = regressor.predict(X_test)
        
        # Evaluating the regression model 
        from sklearn.metrics import mean_absolute_error
        mae = mean_absolute_error(y_test, y_pred) # the best value is 0.0
        
        
        from sklearn.metrics import mean_squared_error
        mse = mean_squared_error(y_test, y_pred) # the best value is 0.0
        
          
        from sklearn.metrics import r2_score
        r2_score = r2_score(y_test, y_pred) # Best possible score is 1.0
        
        print('The mean absolute error for this prediction is: {a}.  \nThe mean squared error for this prediction is: {b}.\nThe root squared score for this prediction is: {c}'.format(a=mae, b=mse,c=r2_score))
        
        

    def SVR_Regressor_Evaluation(X,y):
    
        '''
        This is SVR regressor function performs SVR regression on the features X and labels y inputed. 
        And then output evaluation of the regression using evaluation metrics like mean_absolute_error, mean_squared_error, and r2_score.  
        '''
        
    
        # Splitting the dataset into the Training set and Test set
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
        
        # Feature Scaling
        from sklearn.preprocessing import StandardScaler
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
    
        y_train = sc.fit_transform(y_train)
        y_test = sc.transform(y_test)
        
        
        # Fitting the SVR to the dataset
        from sklearn.svm import SVR 
        regressor = SVR(kernel = 'rbf')
        regressor.fit(X_train, y_train)
        
        # Predicting the Test set results
        y_pred = regressor.predict(X_test)
        
        # Evaluating the regression model 
        from sklearn.metrics import mean_absolute_error
        mae = mean_absolute_error(y_test, y_pred) # the best value is 0.0
        
        
        from sklearn.metrics import mean_squared_error
        mse = mean_squared_error(y_test, y_pred) # the best value is 0.0
        
          
        from sklearn.metrics import r2_score
        r2_score = r2_score(y_test, y_pred) # Best possible score is 1.0
        
        print('The mean absolute error for this prediction is: {a}.  \nThe mean squared error for this prediction is: {b}.\nThe root squared score for this prediction is: {c}'.format(a=mae, b=mse,c=r2_score))
        



# Case 1: Using the punch data to predict the coil data

In [3]:
# importing the dico dataset
dico = pd.read_json("dico_features.json")

X1_with_col_names = dico.drop(['way', 'index', 'segment'], axis=1)

X2_with_col_names = X1_with_col_names.iloc[:, 0:100]

X = X2_with_col_names.iloc[:,0:100].values
X = pd.DataFrame(X).fillna(value = 0, ).values

Label: entryCoiler_force_avg

In [4]:

y = dico.loc [:, 'entryCoiler_force_avg']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.04778655421453969.  
The mean squared error for this prediction is: 0.024945281930116414.
The root squared score for this prediction is: 0.9728074157056137


  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.13783485423015557.  
The mean squared error for this prediction is: 0.06482619831637078.
The root squared score for this prediction is: 0.9293336564749626


Label: exitCoiler_force_min

In [5]:
y = dico.loc [:, 'exitCoiler_force_min']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)

The mean absolute error for this prediction is: 0.047363799118438146.  
The mean squared error for this prediction is: 0.018383016551502342.
The root squared score for this prediction is: 0.9806949015605579


  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.13210697727235712.  
The mean squared error for this prediction is: 0.05014939753013668.
The root squared score for this prediction is: 0.9473351365764352


Label: thickness_ibf_avg

In [6]:

y = dico.loc [:, 'thickness_ibf_avg']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.01173638968346538.  
The mean squared error for this prediction is: 0.013387308312087239.
The root squared score for this prediction is: 0.9856837276512755


In [7]:
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)


  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.09306433108230426.  
The mean squared error for this prediction is: 0.03806944858365841.
The root squared score for this prediction is: 0.9592888591654134


Label: thickness_ibf_max

In [8]:


y = dico.loc [:, 'thickness_ibf_max']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.012195318957642672.  
The mean squared error for this prediction is: 0.01340149953689576.
The root squared score for this prediction is: 0.9856679747385146


In [9]:
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)


  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.09322741791616877.  
The mean squared error for this prediction is: 0.03809918707175283.
The root squared score for this prediction is: 0.9592554168993467


# Case 2: Using the coil data to predict the punch data

In [10]:

# importing the dico dataset
dico = pd.read_json("dico_features.json")


X1_with_col_names = dico.iloc[:, 103:116]

X = X1_with_col_names.iloc[:,0:15].values
X = pd.DataFrame(X).fillna(value = 0, ).values


Label: max_Stempel_2

In [11]:
y = dico.loc [:, 'max_Stempel_2']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.5934458450420357.  
The mean squared error for this prediction is: 0.6468758081413227.
The root squared score for this prediction is: 0.27963419997717287


In [12]:
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)

  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.499499862221531.  
The mean squared error for this prediction is: 0.4885941766648474.
The root squared score for this prediction is: 0.4558978237708754


Label: median_Niederhalter_4

In [13]:


y = dico.loc [:, 'median_Niederhalter_4']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.7856752858474794.  
The mean squared error for this prediction is: 0.9385395477117819.
The root squared score for this prediction is: -0.06286573498886705


In [14]:

SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)

  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.7377718538765446.  
The mean squared error for this prediction is: 0.8825347295699548.
The root squared score for this prediction is: 0.0005579133191428998


Label: min_Position_Ma

In [15]:


y = dico.loc [:, 'min_Position_Ma']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)

The mean absolute error for this prediction is: 0.16473100060439272.  
The mean squared error for this prediction is: 0.977533618834945.
The root squared score for this prediction is: -0.028407945091925235


In [16]:
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)


The mean absolute error for this prediction is: 0.2578611014002888.  
The mean squared error for this prediction is: 0.9543734016248149.
The root squared score for this prediction is: -0.0040423878056810025


  y = column_or_1d(y, warn=True)


Label: mean_Position_NH

In [17]:


y = dico.loc [:, 'mean_Position_NH']
y = pd.DataFrame(y).fillna(value = 0, ).values
y = np.array (y)
y = y.reshape(-1, 1)

# Evaluation 
KNN = RegressionEvaluation.KNN_Regressor_Evaluation(X,y)

The mean absolute error for this prediction is: 0.45598747070691.  
The mean squared error for this prediction is: 1.0887678246394046.
The root squared score for this prediction is: -0.11737600339498422


In [18]:
SVR = RegressionEvaluation.SVR_Regressor_Evaluation(X,y)

  y = column_or_1d(y, warn=True)


The mean absolute error for this prediction is: 0.4156566448186025.  
The mean squared error for this prediction is: 1.1023292068139308.
The root squared score for this prediction is: -0.13129372090257374
