In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from datetime import datetime
import joblib
from feast import FeatureStore
from features import entity, iris_features

In [17]:
!pip install "numpy<2.2" "pyarrow==17.0.0" --force-reinstall

Collecting numpy<2.2
  Using cached numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting pyarrow==17.0.0
  Using cached pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Using cached pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
Using cached numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
Installing collected packages: numpy, pyarrow
[2K  Attempting uninstall: numpy
[2K    Found existing installation: numpy 2.1.3
[2K    Uninstalling numpy-2.1.3:
[2K      Successfully uninstalled numpy-2.1.3
[2K  Attempting uninstall: pyarrow━━━━━━━━━━━━━━━━━[0m [32m0/2[0m [numpy]
[2K    Found existing installation: pyarrow 21.0.0m [32m0/2[0m [numpy]
[2K    Uninstalling pyarrow-21.0.0:━━━━━━━━━━━━[0m [32m0/2[0m [numpy]
[2K      Successfully uninstalled pyarrow-21.0.0[0m [32m0/2[0m [numpy]
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pyarrow]m1/2

In [24]:
!pip install feast



In [20]:
!feast version

[1m[34mFeast SDK Version: [1m[32m"0.54.0"


In [21]:
BUCKET_NAME = "graphite-dynamo-473907-c1_iris"  # @param {type:"string"}

In [11]:
! gsutil mb gs://$BUCKET_NAME

Creating gs://graphite-dynamo-473907-c1_iris/...


In [23]:
data = pd.read_csv('data/iris.csv')

# Add required Feast columns
data['event_timestamp'] = datetime.now()
data['created_timestamp'] = datetime.now()
data['iris_id'] = range(len(data))

# Save with timestamp columns
data.to_csv('data/iris_feast.csv', index=False)
# OR convert to parquet
data.to_parquet('data/iris_feast.parquet', index=False)

In [24]:
store = FeatureStore("./")
store.apply([entity, iris_features])  # Assuming entities/views are defined
store.materialize_incremental(end_date=pd.Timestamp.now())

Materializing [1m[32m1[0m feature views to [1m[32m2025-10-12 11:53:49+00:00[0m into the [1m[32msqlite[0m online store.

[1m[32miris_features[0m from [1m[32m2025-10-12 11:34:04+00:00[0m to [1m[32m2025-10-12 11:53:49+00:00[0m:


In [25]:
training_df = store.get_historical_features(
    entity_df=pd.DataFrame({"iris_id": range(150), "event_timestamp": pd.Timestamp.now()}),  # Adjust for your data size
    features=["iris_features:sepal_length", "iris_features:sepal_width", 
              "iris_features:petal_length", "iris_features:petal_width", 
              "iris_features:species"]
).to_df()
feature_columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
X = training_df[feature_columns]
y = training_df['species']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)

# Train model
mod_dt = DecisionTreeClassifier(max_depth=3, random_state=1)
mod_dt.fit(X_train, y_train)

# Save model
joblib.dump(mod_dt, "artifacts/model.joblib")

['artifacts/model.joblib']

In [26]:
def predict_iris(iris_id: int):
    store = FeatureStore("./")
    
    # Get features from online store
    features = store.get_online_features(
        features=["iris_features:sepal_length", "iris_features:sepal_width",
                 "iris_features:petal_length", "iris_features:petal_width"],
        entity_rows=[{"iris_id": iris_id, "event_timestam-p": pd.Timestamp.now()}]
    ).to_dict()
    
    # Prepare feature array
    feature_df = pd.DataFrame([{
        'sepal_length': features['sepal_length'][0],
        'sepal_width': features['sepal_width'][0], 
        'petal_length': features['petal_length'][0],
        'petal_width': features['petal_width'][0]
    }])
    
    # Load model and predict
    model = joblib.load("artifacts/model.joblib")
    prediction = model.predict(feature_df)
    
    return prediction[0]

In [30]:
print(predict_iris(33))

setosa


In [None]:
Bash lines to pre-setup workplace:
dvc init
mkdir data
mkdir artifacts
dvc remote add -d data-remote gs://trainingdata_mlops_w1_graphite-dynamo-473907-c1/
dvc remote add model-remote gs://artifacts-mlops-graphite-dynamo-473907-c1/
dvc add data/
dvc add artifacts/
git add data.dvc artifacts.dvc .dvc/.gitignore .dvc/config
dvc remote default models-remote --local artifacts/
dvc push
git config --global user.email "varun.van06@gmail.com"
git config --global user.name "vnv06"
git add .
git commit -m 'message'
git remote set-url origin https://YOUR_GITHUB_USERNAME:YOUR_TOKEN@github.com/YOUR_USERNAME/MLOps_Iris.git
git push -u origin main

In [None]:
Bash steps after:
git clone https://github.com/vnv06/MLOps_Iris
cd MLOps_Iris
dvc pull
//upload v1.csv
cat v1.csv >> data/iris.csv
dvc add data/
dvc push