## ML flow 

In [1]:
import pandas as pd

# Sample data
data = {
    'Area': [1000, 1500, 2000, 2500, 3000],
    'Bedrooms': [2, 3, 3, 4, 4],
    'Price': [200000, 300000, 400000, 500000, 600000]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save as CSV
df.to_csv("house_data.csv", index=False)
print("CSV file created successfully!")


CSV file created successfully!


In [5]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import mlflow
import mlflow.sklearn

# Load data
df = pd.read_csv("house_data.csv")
X = df[['Area', 'Bedrooms']]
y = df['Price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set MLflow experiment (will create one if not exists)
mlflow.set_experiment("HousePriceExperiment")

with mlflow.start_run():

    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    # Log parameters and metric
    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("features", ["Area", "Bedrooms"])
    mlflow.log_metric("mse", mse)

    # ✅ Log and Register model
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model",
        registered_model_name="HousePriceModel"
    )

    print("Model trained, logged, and registered!")


2025/07/14 12:19:03 INFO mlflow.tracking.fluent: Experiment with name 'HousePriceExperiment' does not exist. Creating a new experiment.


Model trained, logged, and registered!


Registered model 'HousePriceModel' already exists. Creating a new version of this model...
Created version '4' of model 'HousePriceModel'.
