In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [6]:
# Load the dataset
data = pd.read_csv('data/Lijn 1 laaste 12 maanden.csv')

# Feature selection
features = data[['EL04_Process_Recipe_Working.MESQuality.Density_Max',
                 'EL04_Process_Recipe_Working.MESQuality.Density_Min',
                 'EL04_Process_Recipe_Working.MESQuality.Density_Target',
                 'EL04_Dry_Feed_Rate_PID.OUT',
                 'EL04_Dry_Feed_Rate_PID.PV']]

# Target variable
target = data['EL04_Dry_Feed_Rate_PID.SP']

# Data preprocessing
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R2): {r2}')

# Coefficients and intercept
coefficients = model.coef_
intercept = model.intercept_

print('\nModel Coefficients:')
for feature, coef in zip(features.columns, coefficients):
    print(f'{feature}: {coef}')

print(f'\nIntercept: {intercept}')

Mean Squared Error (MSE): 4052.8470545926934
R-squared (R2): 0.9977616720814801

Model Coefficients:
EL04_Process_Recipe_Working.MESQuality.Density_Max: -63450485785349.875
EL04_Process_Recipe_Working.MESQuality.Density_Min: 31576339934964.25
EL04_Process_Recipe_Working.MESQuality.Density_Target: 31874145850387.0
EL04_Dry_Feed_Rate_PID.OUT: 20.65087890625
EL04_Dry_Feed_Rate_PID.PV: 1342.7098388671875

Intercept: 8103.988296471154
