In [1]:
import pandas as pd
import torch
from stg import STG
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# split the data into features and target variable 

reg_url = 'https://raw.githubusercontent.com/FreeDataSets/DataPool/main/tracks_150000.csv' # this is the url for the dataset
reg_df = pd.read_csv(reg_url)#.sample(100000,random_state=42) # In order to reduce the size of the dataset, we are taking a random sample of 5000 rows from the dataset
reg_df.drop(['name', 'artists','id','release_date', 'artists_id','genre',], axis=1, inplace=True, errors='ignore') # Removing Categorical features with more then 10 unique values
reg_df = reg_df.sample(800, random_state=42)
# a preview of the dataframe
# reg_df.info() 
# display(reg_df.head())
Xreg = reg_df.drop('popularity', axis=1).values # features
yreg = reg_df['popularity'] # target variable


# split the data into train and test sets
Xreg_train, Xreg_test, yreg_train, yreg_test = train_test_split(Xreg, yreg, test_size=0.2, random_state=42)


scaler_reg = StandardScaler().fit(Xreg_train)

Xreg_train_scaled = scaler_reg.transform(Xreg_train)
Xreg_test_scaled = scaler_reg.transform(Xreg_test)
scaler_y = StandardScaler().fit(yreg_train.values.reshape(-1, 1))
yreg_train_scaled = scaler_y.transform(yreg_train.values.reshape(-1, 1))
yreg_test_scaled = scaler_y.transform(yreg_test.values.reshape(-1, 1))
# Define model parameters
args_cuda = torch.cuda.is_available()
device = torch.device("cuda" if args_cuda else "cpu")

print()

In [3]:

model = STG(task_type='regression',input_dim=Xreg_train_scaled.shape[1], output_dim=1, hidden_dims=[100,50, 10], activation='tanh',
optimizer='SGD', learning_rate=0.1, batch_size=Xreg_train_scaled.shape[0], feature_selection=True, sigma=0.5, lam=0.1, random_state=1, device=device)
print("fitting model")

# Then, use Xreg_train_np and yreg_train_np as input to the fit method
model.fit(Xreg_train_scaled, yreg_train_scaled, nr_epochs=3000, valid_X=Xreg_test_scaled, valid_y=yreg_test_scaled, print_interval=100)


fitting model
Epoch: 100: loss=0.836201 valid_loss=0.817036
Epoch: 200: loss=0.842121 valid_loss=0.818696
Epoch: 300: loss=0.828817 valid_loss=0.821953
Epoch: 400: loss=0.825111 valid_loss=0.818316
Epoch: 500: loss=0.857005 valid_loss=0.821247
Epoch: 600: loss=0.792745 valid_loss=0.816683
Epoch: 700: loss=0.772601 valid_loss=0.828407
Epoch: 800: loss=0.808539 valid_loss=0.880853
Epoch: 900: loss=0.702132 valid_loss=0.831919
Epoch: 1000: loss=0.670555 valid_loss=0.860901
Epoch: 1100: loss=0.707632 valid_loss=0.881895
Epoch: 1200: loss=0.655263 valid_loss=0.932324
Epoch: 1300: loss=0.599078 valid_loss=0.942284
Epoch: 1400: loss=0.647917 valid_loss=1.047193
Epoch: 1500: loss=0.551128 valid_loss=0.971875
Epoch: 1600: loss=0.567085 valid_loss=1.005516
Epoch: 1700: loss=0.510041 valid_loss=1.034017
Epoch: 1800: loss=0.508506 valid_loss=1.087727
Epoch: 1900: loss=0.497571 valid_loss=1.104066
Epoch: 2000: loss=0.525219 valid_loss=1.091107
Epoch: 2100: loss=0.416525 valid_loss=1.176463
Epoch: 2

In [6]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error,mean_absolute_percentage_error

## TESTING THE MODEL
y_pred=model.predict(Xreg_test_scaled)
spot_results_dict = {
        'R2 Score': round(r2_score(yreg_test_scaled, y_pred),3),
        'RMSE': round(mean_squared_error(yreg_test_scaled, y_pred, squared=False),3),
        'MAE': round(mean_absolute_error(yreg_test_scaled, y_pred),3),
        'MAPE': round(mean_absolute_percentage_error(yreg_test_scaled, y_pred),3),
        'gates_found': model.get_gates(mode='prob').astype(str),
    }

spot_results_dict = pd.DataFrame(spot_results_dict)

In [8]:
spot_results_dict

Unnamed: 0,R2 Score,RMSE,MAE,MAPE,gates_found
0,-0.339,1.175,0.908,2.283,1.0
1,-0.339,1.175,0.908,2.283,1.0
2,-0.339,1.175,0.908,2.283,1.0
3,-0.339,1.175,0.908,2.283,1.0
4,-0.339,1.175,0.908,2.283,1.0
5,-0.339,1.175,0.908,2.283,1.0
6,-0.339,1.175,0.908,2.283,1.0
7,-0.339,1.175,0.908,2.283,1.0
8,-0.339,1.175,0.908,2.283,1.0
9,-0.339,1.175,0.908,2.283,1.0
