In [18]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
import numpy as np
import pandas as pd

# Load the dataset
music_df = pd.read_csv('music.csv')

# Create dummy variables for 'genre'
music_dummies = pd.get_dummies(music_df['genre'], drop_first=True).astype(int)
music_df = pd.concat([music_df, music_dummies], axis=1)

# Drop the original 'genre' column
music_df.drop('genre', axis=1, inplace=True)

# Features (X) and target (y)
X = music_df.drop('instrumentalness', axis=1).values
y = music_df['instrumentalness'].values

# Ensure X is a 2D array
if X.ndim == 1:
    X = X.reshape(-1, 1)

# Split the data before scaling to prevent data leakage
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43)

# Instantiate scaler
scaler = StandardScaler()

# Fit and transform the train set with the scaler
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test set with the same scaler
X_test_scaled = scaler.transform(X_test)

# Ridge Regression
ridge = Ridge()

# Hyperparameter tuning using GridSearchCV
params_grid = {"alpha": np.linspace(0.00001, 1, 20)}
ridge_cv = GridSearchCV(ridge, params_grid)
ridge_cv.fit(X_train_scaled, y_train)

# Evaluate the model
print("Test Score:", ridge_cv.score(X_test_scaled, y_test))
print("Predictions:", ridge_cv.predict(X_test_scaled))

# Best parameters and score from GridSearchCV
print("Best Parameters:", ridge_cv.best_params_)
print("Best Cross-Validation Score:", ridge_cv.best_score_)


Test Score: 0.16858192854341358
Predictions: [0.03853593 0.00349638 0.11759692 ... 0.20032171 0.00480765 0.2608903 ]
Best Parameters: {'alpha': 1.0}
Best Cross-Validation Score: 0.14165822178691212


In [26]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge

# Assuming X and y are already defined

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge())
])

parameters = {"ridge__alpha": np.linspace(0.001, 1.0, 20)}  # Regularization parameter for Ridge
cv = GridSearchCV(pipeline, param_grid=parameters)
cv.fit(X_train, y_train)

print(cv.score(X_test, y_test))


0.16858192854341358
