In [51]:
#pysr symbolic regression method
#Using from the 'dataset_3.csv'
#import all the necessary libraries
import time
import numpy as np
import pandas as pd
from pysr import *
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

#Load data and read data from csv dataset_3
data = pd.read_csv('dataset_3.csv')
X = data.iloc[:, :-1] #All the independent variables will remain except target or dependent varaiable
y = data.iloc[:, -1]  #Only target or dependent varaiable will remain 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

In [52]:
default_pysr_params = dict(
    populations=15, # Number of populations running, default: 15
    population_size=33, #Number of individuals in each population, default: 33
    ncyclesperiteration=550, # No.of total mutations to run, per 10 samples of the population, per iteration,default:550
    model_selection="best",
)
# Learn equations symbolic regression
model = PySRRegressor(
    niterations=66, #To have better score need to increase the value
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sin"],
    loss="loss(prediction, target) = (prediction - target)^2", # ^ Custom loss function (julia syntax)
    **default_pysr_params    
)

In [53]:
model

In [54]:
#Start model training run time
start_time = time.time()

In [55]:
#Train the model
model.fit(X_train, y_train)

In [56]:
#End time
end_time = time.time()

# Calculate the runtime
runtime = end_time - start_time
print("Runtime: ", runtime, "seconds")

# Make predictions on the test set
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("RMSE:", rmse)
print("MAE:", mae)

Runtime:  413.4299657344818 seconds
RMSE: 0.23094299410027372
MAE: 0.1830180538507527


In [57]:
#To check the R-Squared predection score of the model
model.score(X_test,y_test)

0.054554174474176875

In [58]:
#display the best model
model.equations_

Unnamed: 0,complexity,loss,score,equation,sympy_format,lambda_format
0,1,0.058893,0.0,0.014027979,0.0140279790000000,PySRFunction(X=>0.0140279790000000)
1,3,0.058805,0.000745,(0.018911121 / x1),0.018911121/x1,PySRFunction(X=>0.018911121/x1)
2,4,0.057568,0.021266,(sin(x4) * 0.057109617),0.057109617*sin(x4),PySRFunction(X=>0.057109617*sin(x4))
3,5,0.057525,0.000746,(sin(sin(x5)) * 0.065599345),0.065599345*sin(sin(x5)),PySRFunction(X=>0.065599345*sin(sin(x5)))
4,6,0.056505,0.01789,(sin(x11 * 1.7842507) * 0.06861129),0.06861129*sin(1.7842507*x11),PySRFunction(X=>0.06861129*sin(1.7842507*x11))
5,8,0.056435,0.000618,((sin(x11 * 1.7883527) - -0.116756134) * 0.069...,0.06967195*sin(1.7883527*x11) + 0.008134627530...,PySRFunction(X=>0.06967195*sin(1.7883527*x11) ...
6,9,0.0552,0.022137,((sin(x5) + sin(x11 * 1.8025544)) * 0.059440807),0.059440807*sin(1.8025544*x11) + 0.059440807*s...,PySRFunction(X=>0.059440807*sin(1.8025544*x11)...
7,10,0.05511,0.001632,((sin(sin(x5)) + sin(x11 * 1.7960347)) * 0.064...,0.064801686*sin(1.7960347*x11) + 0.064801686*s...,PySRFunction(X=>0.064801686*sin(1.7960347*x11)...
8,11,0.054418,0.012624,((sin(x5 * 1.7648342) + sin(x11 * 1.7884462)) ...,0.06573608*sin(1.7884462*x11) + 0.06573608*sin...,PySRFunction(X=>0.06573608*sin(1.7884462*x11) ...
9,13,0.054391,0.000249,(((sin(x5 * 1.7725127) + 0.06405707) + sin(x11...,0.06405707*sin(1.7938337*x11) + 0.06405707*sin...,PySRFunction(X=>0.06405707*sin(1.7938337*x11) ...


In [59]:
#Discover the mathematical equations of the model(expressions)
model.sympy()

0.061381802*sin(1.7948679*x11) + 0.061381802*sin(x4) + 0.061381802*sin(1.7844523*x5)

In [60]:
#Dolanload best model
with open(f'dataset_3_best_model', 'w') as f:
 f.write(str(model.sympy()))

In [61]:
# Generate predictions for training and testing data
from sklearn.metrics import mean_squared_error, r2_score
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Evaluate the model
mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Plot the actual vs predicted values for testing data
plt.scatter(y_test, y_test_pred)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r', label='Ideal')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Pysr Symbolic Regression Method)
plt.legend()
plt.show()

SyntaxError: unterminated string literal (detected at line 17) (4104241751.py, line 17)