## Import Python libs and modules

In [None]:
# 3rd party imports
from datetime import datetime, timedelta
from pprint import pprint
import pandas as pd
from feast import (FeatureStore,
                    FileSource,
                    FeatureService,
                    FeatureView,
                    Field,
                    Entity)
from feast.types import  Float32, Int32

## Define the Feast repo location and create its registry.

In [None]:
# Change the following variable to the right path for your environment
FEAST_REPO = "/Users/kike/Library/CloudStorage/OneDrive-VMware,Inc/OCTO/2022-H1/Taurus/Feast/feast_workshops-master/module_1/feature_repo"
fs = FeatureStore(repo_path=FEAST_REPO)
pprint(fs.config.dict())

## Register the Data Source, the Entity, the features in the FeatureView, and the FeatureService with the Feast Registry

In [None]:
# This is the data source which could be provided by Postgress or Taurus DWH
driver_stats = pd.read_parquet(path=f"{FEAST_REPO}/data/driver_stats.parquet")
display(driver_stats.sort_values(by = 'datetime', ascending=False))

In [None]:
'''
Create a FileSource from a file containing feature data. Only Parquet format supported. We use two parameters:
- created_timestamp_column – Timestamp column when row was created, used for deduplicating rows.
- timestamp_field – Event timestamp foe;d used for point in time joins of feature values.
'''
driver_hourly_stats = FileSource(path=f"{FEAST_REPO}/data/driver_stats.parquet",
                                 timestamp_field="datetime",
                                 created_timestamp_column="created")
driver = Entity(name="driver_id",
                value_type=Int32,
                description="driver id",)

driver_hourly_stats_view = FeatureView(
    name="driver_hourly_stats",
    entities=["driver_id"],
    ttl=timedelta(seconds=86400 * 365), # 1 year
    schema=[
        Field(name="conv_rate", dtype=Float32),
        Field(name="acc_rate", dtype=Float32),
        Field(name="avg_daily_trips", dtype=Int32),
    ],
    online=True,
    source=driver_hourly_stats,
    tags={},
)

driver_feature_svc = FeatureService(name="driver_ranking_fv_svc",
                           features=[driver_hourly_stats_view],
                           tags={"description": "Features used to train a MLP model"})

fs.apply([driver, driver_feature_svc, driver_hourly_stats_view]) # create (initialize) the feature store

## Get the training historical data

In [None]:
# We define a toy dataset that has no numeric features
entity_df = pd.DataFrame.from_dict(
    {
        "driver_id": [1001, 1002, 1003, 1004],
        "event_timestamp": [
            datetime(2021, 4, 12, 10, 59, 42),
            datetime(2021, 4, 12, 8, 12, 10),
            datetime(2021, 4, 12, 16, 40, 26),
            datetime(2021, 4, 12, 15, 1, 12),
        ],
    }
)
entity_df

## Point-in-time join
- Feast is able to join features from one or more feature views onto an entity data frame in a point-in-time correct way. This means Feast is able to reproduce the state of features at a specific point in the past.
- More info at [Feast point-in-time joins](http://docs.feast.dev/getting-started/concepts/point-in-time-joins)

In [None]:
'''
This step merges historical features in he data store and the rows from
the identity table. The resulting table is typically used for ML model
training purposes.
'''
training_df = fs.get_historical_features(
        entity_df=entity_df,
        features=fs.get_feature_service("driver_ranking_fv_svc")
    ).to_df()
display(training_df)

## Step 2. Now materialize, load data from offline into online store

In [None]:
'''
This step is complementary to the previous, it is normally executed to extract the latest features
from the offline store into the online store for inference purposes. In this case, we materialize features
for a single calendar day.
'''
fs.materialize(start_date=datetime(year=2021,month=7,day=28),
               end_date=datetime(year=2021,month=7,day=29))

## Step 3: Get the feature vector for inference from the online store

In [None]:
# Get a feature vector (inference) from the online store.
inf_fs = FeatureStore(repo_path=FEAST_REPO)
inf_feature_service = inf_fs.get_feature_service("driver_ranking_fv_svc")

for driver in [1001, 1002, 1003]:
    feature_vector = inf_fs.get_online_features(
        entity_rows=[{"driver_id": driver}],
        features=inf_feature_service
    ).to_df()
    print("--" * 5)
    pprint(feature_vector)