<a href="https://colab.research.google.com/github/xhlulu/Bixi-ML-Analysis/blob/master/apps/dash-cuml-umap/Dash_cuML_UMAP_Colab_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To start this Jupyter Dash app, please run all the cells below. Then, click on the **temporary** URL at the end of the last cell to open the app.

By running this notebook, you agree to the terms and conditions of Kaggle.com, as well as the licenses specified in the original dataset: https://www.kaggle.com/mlg-ulb/creditcardfraud

In [None]:
# Install RAPIDS
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!bash rapidsai-csp-utils/colab/rapids-colab.sh stable

import sys, os

dist_package_index = sys.path.index('/usr/local/lib/python3.6/dist-packages')
sys.path = sys.path[:dist_package_index] + ['/usr/local/lib/python3.6/site-packages'] + sys.path[dist_package_index:]
sys.path
exec(open('rapidsai-csp-utils/colab/update_modules.py').read(), globals())

In [None]:
!pip install -q jupyter-dash==0.3.0rc1 dash-bootstrap-components

In [None]:
!wget -nc https://plotly-tutorials.s3-us-west-1.amazonaws.com/dash-sample-apps/creditcard.csv

In [None]:
import os
import time

import cudf
import cuml
import cupy as cp
import dash
import dash_html_components as html
import dash_core_components as dcc
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
from jupyter_dash import JupyterDash
import plotly.express as px

In [None]:
# Load CSV into a cudf
data_dir = os.environ.get("DATA_DIR", "")
path = os.path.join(data_dir, "creditcard.csv")
gdf = cudf.read_csv(path)
gdf.Time = gdf.Time / 3600
gdf.loc[gdf.Amount > 500, "Amount"] = 500

In [None]:
# Define app
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server


controls = dbc.Row(
    [
        dbc.Col(
            dbc.FormGroup(
                [
                    dbc.Label("Time since start (h)"),
                    dcc.RangeSlider(
                        id="slider-hours",
                        min=0,
                        max=50,
                        step=1,
                        value=[20, 30],
                        marks={i: str(i) for i in range(0, 51, 10)},
                    ),
                ]
            ),
            md=6,
        ),
        dbc.Col(
            dbc.FormGroup(
                [
                    dbc.Label("Transaction Amount ($)"),
                    dcc.RangeSlider(
                        id="slider-amount",
                        min=0,
                        max=500,
                        step=5,
                        value=[200, 300],
                        marks={i: str(i) for i in range(0, 501, 100)},
                    ),
                ]
            ),
            md=6,
        ),
    ],
)


# Define Layout
app.layout = dbc.Container(
    fluid=True,
    children=[
        html.H1("Dash cuML UMAP Demo"),
        html.Hr(),
        dbc.Card(controls, body=True),
        dcc.Graph(id="graph-umap", style={"height": "70vh", "max-height": "90vw"}),
        html.Div(id="output-info"),
    ],
    style={"max-width": "960px", "margin": "auto"},
)

In [None]:
@app.callback(
    [Output("graph-umap", "figure"), Output("output-info", "children")],
    [Input("slider-amount", "value"), Input("slider-hours", "value"),],
)
def update_graph(amt, hrs):
    t0 = time.time()
    # First, filter based on the slider values
    time_mask = (gdf.Time >= hrs[0]) & (gdf.Time <= hrs[1])
    amount_mask = (gdf.Amount >= amt[0]) & (gdf.Amount <= amt[1])
    filt_df = gdf.loc[time_mask & amount_mask]

    # Then, select the features and train a UMAP model with cuML
    features = filt_df.loc[:, "V1":"V28"].values
    reducer = cuml.UMAP()
    embedding = reducer.fit_transform(features)

    # Convert the embedding back to numpy
    embedding = cp.asnumpy(embedding)
    amount = cp.asnumpy(filt_df.Amount.values.round(2))

    # Create a plotly.express scatter plot
    fig = px.scatter(
        x=embedding[:, 0],
        y=embedding[:, 1],
        color=amount,
        labels={"color": "Amount ($)"},
        title="UMAP projection of credit card transactions",
    )

    t1 = time.time()
    out_msg = f"Projected {embedding.shape[0]} transactions in {t1-t0:.2f}s."
    alert = dbc.Alert(out_msg, color="success", dismissable=True)

    return fig, alert

In [None]:
app.run_server(mode='external')