In [65]:
#pysr symbolic regression method
#Using from the 'dataset_1.csv'
#import all the necessary libraries
import time
import numpy as np
import pandas as pd
from pysr import *
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

#Load data and read data from csv dataset_1
data = pd.read_csv('dataset_1.csv')
X = data.iloc[:, :-1] #All the independent variables will remain except target or dependent varaiable
y = data.iloc[:, -1]  #Only target or dependent varaiable will remain 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

In [66]:
default_pysr_params = dict(
    populations=15, # Number of populations running, default: 15
    population_size=33, #Number of individuals in each population, default: 33
    ncyclesperiteration=550, # No.of total mutations to run, per 10 samples of the population, per iteration,default:550
    model_selection="best",
)
# Learn equations symbolic regression
model = PySRRegressor(
    niterations=100, #To have better score need to increase the value
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sin"],
    loss="loss(prediction, target) = (prediction - target)^2", # ^ Custom loss function (julia syntax)
    **default_pysr_params    
)

In [67]:
#Start model training run time
start_time = time.time()

In [None]:
#Train the model
model.fit(X_train, y_train)

In [None]:
#End time
end_time = time.time()

# Calculate the runtime
runtime = end_time - start_time
print("Runtime: ", runtime, "seconds")

# Make predictions on the test set
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("RMSE:", rmse)
print("MAE:", mae)

In [None]:
#To check the R-squared predection score of the model
model.score(X_test,y_test)

In [None]:
#Display all the discovered  mathematical equations
model.equations_

In [None]:
#Discover the mathematical equations of the model(expressions)
model.sympy()

In [None]:
##Dolanload generated best model equation
with open(f'dataset_1_best_model', 'w') as f:
 f.write(str(model.sympy()))

In [None]:
# Plot the actual vs predicted values for testing data
plt.scatter(y_test, y_test_pred)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r', label='Ideal')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Pysr Symbolic Regression Method')
plt.legend()
plt.show()