In [25]:
import pandas as pd

df = pd.read_csv('../data/processed/winequality-red-cleaned.csv')
df_fe = df.copy()


### Alcohol level (approx. actual % alcohol, density-corrected)

- The alcohol column in the original dataset is raw.
- Dividing by density gives a stabilized approximation of the actual %
- This makes the feature less sensitive to noise and improves modeling.

### Acid Balance

- Wines with high fixed acidity but low volatile acidity are typically
- perceived as "clean" and well-structured. This ratio captures that.


### Sweetness Index

- Wines with high residual sugar and low alcohol content taste sweeter.
- We divide sugar by alcohol_level (already %), giving a stable sweetness metric.

### Complexity Index

- A handcrafted feature combining acidity, sulphates (stabilizer), 
- and alcohol level, all known contributors to flavor complexity.
- Weighted to reflect typical winemaking influence.

In [26]:
df_fe["alcohol percentage"] = df_fe["alcohol"] / df_fe["density"]
df_fe["acid_balance"] = df_fe["fixed acidity"] / (df_fe["volatile acidity"] + 1e-5)
df_fe["sweetness index"] = df_fe["residual sugar"] / (df_fe["alcohol percentage"] + 1e-5)
df_fe["complexity index"] = (
    df_fe["fixed acidity"]*0.4 +
    df_fe["sulphates"]*0.3 +
    df_fe["alcohol percentage"]*0.3
)


# Complexity index:
- 40% acidity -> structural backbone
- 30% sulphates -> stabilization & flavor intensity
- 30% alcohol -> body & aromatic profile

In [27]:
df_fe.head()
df_fe.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
fixed acidity,1518.0,8.33531,1.738518,4.7,7.1,7.9,9.2,15.9
volatile acidity,1518.0,0.520909,0.171017,0.12,0.39,0.52,0.63,1.33
citric acid,1518.0,0.27359,0.192868,0.0,0.1,0.26,0.4275,0.79
residual sugar,1518.0,2.532279,1.397449,0.9,1.9,2.2,2.6,15.5
chlorides,1518.0,0.087349,0.045761,0.012,0.07,0.079,0.09,0.611
free sulfur dioxide,1518.0,16.0639,10.477423,1.0,8.0,14.0,22.0,72.0
total sulfur dioxide,1518.0,47.121212,33.101582,6.0,23.0,38.0,63.0,289.0
density,1518.0,0.996767,0.001883,0.99007,0.9956,0.996765,0.99786,1.00369
pH,1518.0,3.308603,0.152161,2.86,3.21,3.31,3.4,4.01
sulphates,1518.0,0.659585,0.166541,0.37,0.55,0.62,0.73,1.98


In [28]:
df_fe.to_csv('../data/processed/winequality-red-fe.csv', index=False)