In [5]:
import pandas as pd

df = pd.read_csv('../data/processed/winequality-red-cleaned.csv')
df_fe = df.copy()


### Alcohol level (approx. actual % alcohol, density-corrected)

- The alcohol column in the original dataset is raw.
- Dividing by density gives a stabilized approximation of the actual %
- This makes the feature less sensitive to noise and improves modeling.

### Acid Balance

- Wines with high fixed acidity but low volatile acidity are typically
- perceived as "clean" and well-structured. This ratio captures that.


### Sweetness Index

- Wines with high residual sugar and low alcohol content taste sweeter.
- We divide sugar by alcohol_level (already %), giving a stable sweetness metric.

### Complexity Index

- A handcrafted feature combining acidity, sulphates (stabilizer), 
- and alcohol level, all known contributors to flavor complexity.
- Weighted to reflect typical winemaking influence.

In [6]:
df_fe["alcohol percentage"] = df_fe["alcohol"] / df_fe["density"] * 100
df_fe["acid balance"] = df_fe["fixed acidity"] / (1 + df_fe["volatile acidity"])
df_fe["sweetness index"] = df_fe["residual sugar"] / (df_fe["alcohol percentage"] + 1e-5)
df_fe["complexity index"] = (
    df_fe["fixed acidity"]*0.4 +
    df_fe["sulphates"]*0.3 +
    df_fe["alcohol percentage"]*0.3
)


In [7]:
df_fe.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,alcohol percentage,acid balance,sweetness index,complexity index
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,942.07256,4.352941,0.002017,285.749768
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,983.146067,4.148936,0.002645,298.26782
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,982.948847,4.431818,0.00234,298.199654
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,981.963928,8.75,0.001935,299.243178
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,942.07256,4.352941,0.002017,285.749768


In [8]:
df_fe.to_csv('../data/processed/winequality-red-fe.csv', index=False)