In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Load the Diabetes dataset
diabetes = load_diabetes()

# Split the dataset into features (X) and target (y)
X = diabetes.data
y = diabetes.target

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Lasso regression model
lasso = Lasso(alpha=0.1)  # Regularization strength

# Fit the model to the training data
lasso.fit(X_train, y_train)

# Get the coefficients and identify selected features
coefficients = lasso.coef_
selected_features = np.where(coefficients != 0)[0]  # Indices of selected features

# Get the names of the selected features
selected_feature_names = [diabetes.feature_names[i] for i in selected_features]  # List comprehension

# Print the selected feature names
print("Selected features:", selected_feature_names)

# Train a model using the selected features
X_train_lasso = X_train[:, selected_features]
X_test_lasso = X_test[:, selected_features]
lasso.fit(X_train_lasso, y_train)

# Make predictions
y_pred = lasso.predict(X_test_lasso)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance:")
print("Mean Squared Error:", mse)
print("R^2 Score:", r2)


Selected features: ['sex', 'bmi', 'bp', 's1', 's3', 's5', 's6']

Model Performance:
Mean Squared Error: 2798.242907511305
R^2 Score: 0.47184545850451176
