<a href="https://colab.research.google.com/github/whysomebody1/Machine-Learning/blob/main/wine_quality_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset from a publicly available source
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Separate features and target variable
X = data.drop('quality', axis=1)
y = data['quality']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features to have mean of 0 and variance of 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement Ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
ridge_pred = ridge.predict(X_test_scaled)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_r2 = r2_score(y_test, ridge_pred)
print("Ridge Regression Mean Squared Error (MSE):", ridge_mse)
print("Ridge Regression R-squared:", ridge_r2)

# Implement Lasso Regression
lasso = Lasso(alpha=1.0)
lasso.fit(X_train_scaled, y_train)
lasso_pred = lasso.predict(X_test_scaled)
lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_r2 = r2_score(y_test, lasso_pred)
print("Lasso Regression Mean Squared Error (MSE):", lasso_mse)
print("Lasso Regression R-squared:", lasso_r2)

# Implement Support Vector Regression (SVR)
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)
svr.fit(X_train_scaled, y_train)
svr_pred = svr.predict(X_test_scaled)
svr_mse = mean_squared_error(y_test, svr_pred)
svr_r2 = r2_score(y_test, svr_pred)
print("SVR Mean Squared Error (MSE):", svr_mse)
print("SVR R-squared:", svr_r2)

Ridge Regression Mean Squared Error (MSE): 0.39003800591460774
Ridge Regression R-squared: 0.4031606598177524
Lasso Regression Mean Squared Error (MSE): 0.6571600689645265
Lasso Regression R-squared: -0.005591701339940913
SVR Mean Squared Error (MSE): 0.39663419885253093
SVR R-squared: 0.3930671115453136


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Step 2: Explore the dataset
print(data.head())  # Display the first few rows
print(data.info())  # Display information about the dataset
print(data.describe())  # Statistical summary of the dataset

# Step 3: Handle missing values (if any)
print(data.isnull().sum())  # Check for missing values
# If there are missing values, decide on a strategy to handle them (e.g., imputation or removal)

# Step 4: Split the dataset into features (X) and the target variable (y)
X = data.drop('quality', axis=1)  # Features
y = data['quality']  # Target variable

# Step 5: Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Optional: Check the shape of the training and testing sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5 

In [None]:
# Hey there! Let's dive into some data analysis magic, shall we?
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

# First things first, we need to get our hands on some data.
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Now, let's split our data into features and the target variable.
X = data.drop('quality', axis=1)
y = data['quality']

# With our data in hand, we split it into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Time to prep our data for modeling - let's scale those features!
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Alright, now let's get into the fun part - exploring different regression models.

# First up, the trusty Linear Regression (our baseline model).
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)
print("Linear Regression Mean Squared Error (MSE):", lr_mse)
print("Linear Regression R-squared:", lr_r2)

# Now, let's spice things up with Lasso Regression (L1 regularization).
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
lasso_pred = lasso.predict(X_test_scaled)
lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_r2 = r2_score(y_test, lasso_pred)
print("Lasso Regression Mean Squared Error (MSE):", lasso_mse)
print("Lasso Regression R-squared:", lasso_r2)

# Moving along, we have Ridge Regression (L2 regularization).
ridge = Ridge(alpha=0.1)
ridge.fit(X_train_scaled, y_train)
ridge_pred = ridge.predict(X_test_scaled)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_r2 = r2_score(y_test, ridge_pred)
print("Ridge Regression Mean Squared Error (MSE):", ridge_mse)
print("Ridge Regression R-squared:", ridge_r2)

# Last but not least, let's experiment with Elastic Net Regression (combining L1 and L2 regularization).
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic_net.fit(X_train_scaled, y_train)
elastic_net_pred = elastic_net.predict(X_test_scaled)
elastic_net_mse = mean_squared_error(y_test, elastic_net_pred)
elastic_net_r2 = r2_score(y_test, elastic_net_pred)
print("Elastic Net Regression Mean Squared Error (MSE):", elastic_net_mse)
print("Elastic Net Regression R-squared:", elastic_net_r2)


Linear Regression Mean Squared Error (MSE): 0.390025143963955
Linear Regression R-squared: 0.40318034127962177
Lasso Regression Mean Squared Error (MSE): 0.4392249149065927
Lasso Regression R-squared: 0.327894450209431
Ridge Regression Mean Squared Error (MSE): 0.3900264044148274
Ridge Regression R-squared: 0.40317841252740894
Elastic Net Regression Mean Squared Error (MSE): 0.41492127773606596
Elastic Net Regression R-squared: 0.3650840741766441


In [None]:
# Alright, let's dive into some machine learning fun! 🚀
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# First things first, let's grab our dataset.
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Now, let's split our data into features and the target variable.
X = data.drop('quality', axis=1)
y = data['quality']

# Time to split our data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Next up, let's make sure our features are scaled properly.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Now, let's bring in the Support Vector Regression (SVR) magic with L2 regularization (Ridge).
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)
svr.fit(X_train_scaled, y_train)
svr_pred = svr.predict(X_test_scaled)

# Time to see how well our SVR model with L2 regularization (Ridge) performs.
svr_mse = mean_squared_error(y_test, svr_pred)
svr_r2 = r2_score(y_test, svr_pred)
print("Support Vector Regression (SVR) with L2 regularization (Ridge) - Mean Squared Error (MSE):", svr_mse)
print("Support Vector Regression (SVR) with L2 regularization (Ridge) - R-squared:", svr_r2)


Support Vector Regression (SVR) with L2 regularization (Ridge) - Mean Squared Error (MSE): 0.39663419885253093
Support Vector Regression (SVR) with L2 regularization (Ridge) - R-squared: 0.3930671115453136


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

# Split features and target variable
X = data.drop('quality', axis=1)
y = data['quality']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Regularized Linear Regression (Ridge)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
ridge_pred = ridge.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_r2 = r2_score(y_test, ridge_pred)
print("Ridge Regression MSE:", ridge_mse)
print("Ridge Regression R-squared:", ridge_r2)

# Regularized Linear Regression (Lasso)
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
lasso_pred = lasso.predict(X_test)
lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_r2 = r2_score(y_test, lasso_pred)
print("Lasso Regression MSE:", lasso_mse)
print("Lasso Regression R-squared:", lasso_r2)

# Support Vector Regression (SVR) with L2 regularization (Ridge)
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)
svr.fit(X_train, y_train)
svr_pred = svr.predict(X_test)
svr_mse = mean_squared_error(y_test, svr_pred)
svr_r2 = r2_score(y_test, svr_pred)
print("SVR (Ridge) MSE:", svr_mse)
print("SVR (Ridge) R-squared:", svr_r2)


Ridge Regression MSE: 0.390036580824497
Ridge Regression R-squared: 0.4031628405022716
Lasso Regression MSE: 0.6571600689645265
Lasso Regression R-squared: -0.005591701339940913
SVR (Ridge) MSE: 0.3966236876138224
SVR (Ridge) R-squared: 0.3930831959285792


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

# Load the new data (example)
new_data = pd.DataFrame({
    'fixed acidity': [7.2],
    'volatile acidity': [0.4],
    'citric acid': [0.35],
    'residual sugar': [6.0],
    'chlorides': [0.067],
    'free sulfur dioxide': [18.0],
    'total sulfur dioxide': [41.0],
    'density': [0.994],
    'pH': [3.4],
    'sulphates': [0.65],
    'alcohol': [11.0]
})

# Standardize the features of the new data
scaler = StandardScaler()
new_data_scaled = scaler.fit_transform(new_data)

# Trained Ridge Regression model
ridge = Ridge(alpha=1.0)
# Assuming X_train and y_train are the training data used to train the model
ridge.fit(X_train, y_train)

# Predict using the trained model
predictions = ridge.predict(new_data_scaled)

print("Predicted wine quality:", predictions)


Predicted wine quality: [5.62941953]
