In [134]:
import numpy as np
import pandas as pd 
from hyperopt import STATUS_OK,Trials,fmin,hp,tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature
import keras

In [135]:
data=pd.read_csv("winequality-white.csv",sep=";")
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [136]:
train,test=train_test_split(data,test_size=0.20,random_state=42)

In [137]:
train_x=train.drop(['quality'],axis=1).values

In [138]:
train_y=train["quality"].values

In [139]:
train_y    

array([6, 5, 6, ..., 6, 6, 8], dtype=int64)

In [140]:
test_x=test.drop(['quality'],axis=1).values

In [141]:
test_y=test["quality"].values

In [142]:
test_y

array([7, 8, 8, 5, 7, 6, 5, 4, 6, 5, 7, 5, 7, 6, 5, 5, 8, 5, 7, 6, 6, 7,
       5, 5, 8, 5, 5, 6, 7, 5, 5, 5, 6, 6, 5, 7, 7, 6, 5, 6, 6, 6, 7, 6,
       4, 6, 6, 6, 5, 5, 5, 5, 6, 6, 6, 7, 6, 5, 5, 7, 6, 6, 6, 7, 6, 7,
       6, 6, 6, 5, 5, 6, 4, 5, 5, 6, 6, 7, 7, 7, 5, 6, 6, 7, 7, 6, 5, 7,
       5, 7, 7, 5, 7, 6, 8, 7, 6, 6, 5, 6, 6, 5, 7, 6, 6, 6, 7, 7, 5, 6,
       6, 7, 6, 5, 5, 7, 7, 6, 5, 8, 5, 6, 6, 6, 7, 6, 7, 6, 6, 5, 5, 6,
       6, 5, 5, 6, 6, 6, 6, 5, 8, 6, 6, 6, 6, 5, 5, 6, 5, 6, 6, 6, 6, 5,
       7, 5, 6, 6, 6, 5, 6, 5, 6, 6, 7, 6, 5, 5, 5, 6, 7, 6, 6, 5, 6, 6,
       6, 6, 6, 5, 6, 5, 7, 6, 5, 7, 7, 6, 6, 5, 6, 5, 7, 6, 4, 5, 7, 6,
       6, 6, 7, 4, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 7, 5, 6, 7, 3, 6, 6,
       5, 6, 5, 6, 6, 5, 7, 7, 7, 5, 6, 5, 6, 6, 4, 7, 6, 4, 6, 7, 6, 4,
       7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 5, 5, 7, 6, 6, 5, 6, 5, 5, 6, 5, 6,
       7, 6, 6, 6, 7, 5, 5, 6, 7, 8, 6, 6, 5, 6, 7, 5, 6, 5, 6, 4, 6, 7,
       7, 6, 5, 6, 7, 6, 6, 5, 8, 5, 5, 6, 5, 5, 5,

In [143]:
x_train,x_valid,y_train,y_valid=train_test_split(train_x,train_y,test_size=0.2,random_state=42)

In [144]:
signature=infer_signature(x_train,y_train )

In [145]:
mean=np.mean(x_train,axis=0)

In [146]:
mean

array([6.87086790e+00, 2.77890874e-01, 3.33854499e-01, 6.36080089e+00,
       4.55105297e-02, 3.49082642e+01, 1.37556637e+02, 9.94029683e-01,
       3.19041800e+00, 4.89722399e-01, 1.05203340e+01])

In [147]:
def train_model(params,epochs,train_x,train_y,valid_x,valid_y,test_x,test_y):
    mean=np.mean(x_train,axis=0)
    var=np.var(x_train,axis=0)
    model=keras.Sequential([
        keras.Input([x_train.shape[1]]),
        keras.layers.Normalization(mean=mean,variance=var),
        keras.layers.Dense(64,activation='relu'),
        keras.layers.Dense(1)

    ])
    model.compile(optimizer=keras.optimizers.SGD(
        learning_rate=params['lr'],momentum=params['momentum'] 

    ),loss="mean_squared_error",
    metrics=[keras.metrics.RootMeanSquaredError()])






    #Train the model with the darta
    with mlflow.start_run(nested=True):
        model.fit(x_train,y_train,validation_data=(x_valid,y_valid),
                  epochs=epochs,batch_size=64)
        

        #evaluate the model
        eval_result=model.evaluate(x_valid,y_valid,batch_size=64)

        eval_rmse=eval_result[1]

        #log the parameters
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse",eval_rmse) 

        #log the model
        mlflow.tensorflow.log_model(model,"model",signature=signature)

        return {"loss":eval_rmse,"status":STATUS_OK,"model":model}


In [148]:
def objective(params):
    result=train_model(
        params,
        epochs=3,
        train_x=x_train,
        train_y=y_train,
        valid_x=x_valid,
        valid_y=y_valid,
        test_x=test_x,
        test_y=test_y

    )
    return result

In [149]:
space={
    "lr":hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum":hp.uniform("momentum",0.0,1.0)
}

In [150]:
mlflow.set_experiment("/wine-quality")

with mlflow.start_run():
    trails=Trials()
    best=fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=4,
        trials=trails



    )
    best_run=sorted(trails.results, key=lambda x:x["loss"])[0]
    mlflow.log_params(best)
    mlflow.log_metric("eval rmse",best_run['loss'])
    mlflow.tensorflow.log_model(best_run["model"],"model",signature=signature)

    print(f'Best Parameters:{best}')
    print(f'Best Eval RSME:{best_run["loss"]}')

    



Epoch 1/3                                            

[1m 1/49[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m31s[0m 656ms/step - loss: 34.9038 - root_mean_squared_error: 5.9079
[1m 2/49[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 34.2752 - root_mean_squared_error: 5.8528
[1m 3/49[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 3ms/step - loss: 34.2571 - root_mean_squared_error: 5.8528
[1m 4/49[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 34.1519 - root_mean_squared_error: 5.8438
[1m 5/49[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 3ms/step - loss: 34.1554 - root_mean_squared_error: 5.8441   
[1m28/49[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 33.8082 - root_mean_squared_error: 5.8135
[1m30/49[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 33.7871 - root_mean_squared_error: 5.8126
[1m29/49[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 33.7971 - root_