# Day 08: Data Processing

---


# Setup

Run the following code chunk if you want to run the plot

In [None]:
!pip install dash

# Visualization-specific stuff
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, no_update

In [None]:
# Core libraries
import pandas as pd
import grama as gr
DF = gr.Intention()


## Grama vocab

![](../images/verb-classes-bw.png)


# Data Processing

## Load the data

Load from a public Google sheet:


In [None]:
# Copy the URL
#   Note the /export?git=0&format=csv at the end of the URL;
#   this is needed for proper import
url = "https://docs.google.com/spreadsheets/d/1AuuzUtKOSvqpNKSY8FJp-1r6baw5ignI7xs_UJ1s9dc/export?git=0&format=csv"

# Pandas will automagically resolve the URL
# and load a CSV into a DataFrame
df_raw = pd.read_csv(url)
df_raw.head()

## Wrangle the data

"Wrangling" is processing data to make it more usable. We:

- Rename the columns to make them easier to work with
- Edit the provided URL to make it play nicely with our plotting code:
  - Rather than `"open"` the image, we'll display it as a `"thumbnail"`
  - We'll add a HTML argument to specify the image size (`"sz"`)

Some grama concepts to highlight:

- We use `>>` to specify verbs in a data pipeline
- We use `DF` in a data pipeline to refer to the data that we are currently processing.
  - This is needed to apply multiple edits to a dataset (`gr.tf_mutate()` below).


In [None]:
# Wrangle data
df_data = (
    df_raw
    >> gr.tf_rename(
        img_schematic="Schematic - your whiteboard drawing before implementing",
        img_implement="Implementation - image of your implementation on the pegboard, make sure top of board and top of weight are visible",
        n_bands="Rubber bands - how many?",
        n_metal="Metal brackets - how many?",
        disp="Displacement - Distance from top? (cm)",
    )
    >> gr.tf_mutate(img_implement=gr.str_replace(DF.img_implement, "open", "thumbnail"))
    >> gr.tf_mutate(img_implement=gr.str_c(DF.img_implement, "&sz=w1000"))
)
df_data.head()

## Search

We can search through a dataset to find cases that are "interesting". For instance, if we arrange the data from least to most `disp`, this will show the designs that optimized displacement the most. The following code shows how to display the 6 smallest `disp` rows.


In [None]:
# Arrange from least to most along `disp`
(
    df_data
    >> gr.tf_arrange(DF.disp)
    >> gr.tf_head(6)
)

## Visualize

This code is quite a bit more complex, but really helpful for exploring the data.


In [None]:
fig = go.Figure(data=[
    go.Scatter(
        x=df_data["n_bands"],
        y=df_data["disp"],
        mode="markers",
        marker=dict(
            color="black",
            line={"color": "#444"},
            sizeref=45,
            opacity=0.8,
        )
    )
])

# Set font size to something readable
fig.update_layout(
    font=dict(
        family="Arial", # Set the default font family
        size=18,         # Set the default font size
        color="Black"
    )
)

# turn off native plotly.js hover effects - make sure to use
# hoverinfo="none" rather than "skip" which also halts events.
fig.update_traces(hoverinfo="none", hovertemplate=None)

fig.update_layout(
    xaxis=dict(title='Bands (#)'),
    yaxis=dict(title='Displacement (cm)'),
    plot_bgcolor='rgba(255,255,255,0.1)'
)

app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id="graph-basic-2", figure=fig, clear_on_unhover=True),
    dcc.Tooltip(id="graph-tooltip"),
])


@app.callback(
    Output("graph-tooltip", "show"),
    Output("graph-tooltip", "bbox"),
    Output("graph-tooltip", "children"),
    Input("graph-basic-2", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    pt = hoverData["points"][0]
    bbox = pt["bbox"]
    num = pt["pointNumber"]

    df_row = df_data.iloc[num]
    img_src = df_row['img_implement']
    name = "ID: {0}".format(num)

    children = [
        html.Div([
            html.Img(src=img_src, style={"width": "100%"}),
            html.H2(f"{name}", style={"color": "darkblue"}),
            # html.P(f"{form}"),
            # html.P(f"{desc}"),
        ], style={'width': '200px', 'white-space': 'normal'})
    ]

    return True, bbox, children


if __name__ == "__main__":
    app.run(debug=True)