In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd

# Load the data
data = pd.read_csv("datasets/Conventional.csv")

# Prepare the data
dates = data['Date']
envi_values = data['Envi']
fuji_values = data['Fuji']
gala_values = data['Gala']

# Define the window size
window_size = 4

# Create a function to generate windowed data
def create_windowed_data(series):
    X = []
    y = []
    for i in range(len(series) - window_size):
        X.append(series[i:i+window_size])
        y.append(series[i+window_size])
    return np.array(X), np.array(y)

def window_input_output(inp: int, out: int, data: pd.DataFrame) -> pd.DataFrame:
    df = data.copy()
    i = 1
    while i < inp:
        df[f'x_{i}'] = df.iloc[:, 0].shift(i)
        i += 1
    j = 0
    while j < out:
        df[f'y_{j}'] = df.iloc[:, 0].shift(out+j)
        j += 1
    df = df.dropna(axis=0)
    return df

# Create windowed data for each fruit type
X_envi, y_envi = create_windowed_data(envi_values)
X_fuji, y_fuji = create_windowed_data(fuji_values)
X_gala, y_gala = create_windowed_data(gala_values)

# Split the data into training and testing sets
X_envi_train, X_envi_test, y_envi_train, y_envi_test = train_test_split(X_envi, y_envi, test_size=0.2, random_state=42, shuffle=False)
X_fuji_train, X_fuji_test, y_fuji_train, y_fuji_test = train_test_split(X_fuji, y_fuji, test_size=0.2, random_state=42, shuffle=False)
X_gala_train, X_gala_test, y_gala_train, y_gala_test = train_test_split(X_gala, y_gala, test_size=0.2, random_state=42, shuffle=False)

# Initialize linear regression models
model_envi = LinearRegression()
model_fuji = LinearRegression()
model_gala = LinearRegression()

# Fit linear regression models
model_envi.fit(X_envi_train, y_envi_train)
model_fuji.fit(X_fuji_train, y_fuji_train)
model_gala.fit(X_gala_train, y_gala_train)

# Predictions on the test set
pred_envi_test = model_envi.predict(X_envi_test)
pred_fuji_test = model_fuji.predict(X_fuji_test)
pred_gala_test = model_gala.predict(X_gala_test)

# Compute MSE for each fruit type
mse_envi = mean_squared_error(y_envi_test, pred_envi_test)
mse_fuji = mean_squared_error(y_fuji_test, pred_fuji_test)
mse_gala = mean_squared_error(y_gala_test, pred_gala_test)

# Compute R2 score for each fruit type
r2_envi = r2_score(y_envi_test, pred_envi_test)
r2_fuji = r2_score(y_fuji_test, pred_fuji_test)
r2_gala = r2_score(y_gala_test, pred_gala_test)

print("Mean Squared Error (MSE) for Envi:", mse_envi)
print("Mean Squared Error (MSE) for Fuji:", mse_fuji)
print("Mean Squared Error (MSE) for Gala:", mse_gala)

print("R-squared (R2) for Envi:", r2_envi)
print("R-squared (R2) for Fuji:", r2_fuji)
print("R-squared (R2) for Gala:", r2_gala)


Mean Squared Error (MSE) for Envi: 475420541.81234163
Mean Squared Error (MSE) for Fuji: 6732327963.077978
Mean Squared Error (MSE) for Gala: 22045650.614828892
R-squared (R2) for Envi: 0.4922543521610304
R-squared (R2) for Fuji: -0.1664789620152647
R-squared (R2) for Gala: 0.0454635516592522
