# Day 08: Data Processing

---


In [1]:
# Core libraries
import pandas as pd
import grama as gr
DF = gr.Intention()

# Visualization-specific stuff
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, no_update

## Grama vocab

![](../images/verb-classes-bw.png)


# Data Processing

## Load the data

Load from a public Google sheet:


In [2]:
# Copy the URL
#   Note the /export?git=0&format=csv at the end of the URL;
#   this is needed for proper import
url = "https://docs.google.com/spreadsheets/d/1AuuzUtKOSvqpNKSY8FJp-1r6baw5ignI7xs_UJ1s9dc/export?git=0&format=csv"

# Pandas will automagically resolve the URL
# and load a CSV into a DataFrame
df_raw = pd.read_csv(url)
df_raw.head()

Unnamed: 0,Timestamp,Schematic - your whiteboard drawing before implementing,Rubber bands - how many?,Metal brackets - how many?,Displacement - Distance from top? (cm),"Implementation - image of your implementation on the pegboard, make sure top of board and top of weight are visible"
0,1/17/2026 8:23:21,https://drive.google.com/open?id=1XScFH9fZEwgQ...,1,1,14.1,https://drive.google.com/open?id=1QYRRkyi6x6i0...
1,1/23/2026 8:42:26,https://drive.google.com/open?id=1y9CG7MlbN3De...,1,1,1.0,https://drive.google.com/open?id=1r_WAH6nw7Sa2...
2,1/23/2026 8:45:10,https://drive.google.com/open?id=1tbYwznlKKSd6...,1,2,6.5,https://drive.google.com/open?id=14C-pThpwlVwC...
3,1/23/2026 8:46:16,https://drive.google.com/open?id=1mbaQeFj45JpE...,1,1,2.5,https://drive.google.com/open?id=1LWhuIOy5qsIE...
4,1/23/2026 8:46:27,https://drive.google.com/open?id=1l3AjUP-4dq2g...,1,2,8.8,https://drive.google.com/open?id=1UUqUjNQqgJwE...


## Wrangle the data

"Wrangling" is processing data to make it more usable. We:

- Rename the columns to make them easier to work with
- Edit the provided URL to make it play nicely with our plotting code:
  - Rather than `"open"` the image, we'll display it as a `"thumbnail"`
  - We'll add a HTML argument to specify the image size (`"sz"`)

Some grama concepts to highlight:

- We use `>>` to specify verbs in a data pipeline
- We use `DF` in a data pipeline to refer to the data that we are currently processing.
  - This is needed to apply multiple edits to a dataset (`gr.tf_mutate()` below).


In [3]:
# Wrangle data
df_data = (
    df_raw
    >> gr.tf_rename(
        img_schematic="Schematic - your whiteboard drawing before implementing",
        img_implement="Implementation - image of your implementation on the pegboard, make sure top of board and top of weight are visible",
        n_bands="Rubber bands - how many?",
        n_metal="Metal brackets - how many?",
        disp="Displacement - Distance from top? (cm)",
    )
    >> gr.tf_mutate(img_implement=gr.str_replace(DF.img_implement, "open", "thumbnail"))
    >> gr.tf_mutate(img_implement=gr.str_c(DF.img_implement, "&sz=w1000"))
)
df_data.head()

Unnamed: 0,Timestamp,img_schematic,n_bands,n_metal,disp,img_implement
0,1/17/2026 8:23:21,https://drive.google.com/open?id=1XScFH9fZEwgQ...,1,1,14.1,https://drive.google.com/thumbnail?id=1QYRRkyi...
1,1/23/2026 8:42:26,https://drive.google.com/open?id=1y9CG7MlbN3De...,1,1,1.0,https://drive.google.com/thumbnail?id=1r_WAH6n...
2,1/23/2026 8:45:10,https://drive.google.com/open?id=1tbYwznlKKSd6...,1,2,6.5,https://drive.google.com/thumbnail?id=14C-pThp...
3,1/23/2026 8:46:16,https://drive.google.com/open?id=1mbaQeFj45JpE...,1,1,2.5,https://drive.google.com/thumbnail?id=1LWhuIOy...
4,1/23/2026 8:46:27,https://drive.google.com/open?id=1l3AjUP-4dq2g...,1,2,8.8,https://drive.google.com/thumbnail?id=1UUqUjNQ...


## Search

We can search through a dataset to find cases that are "interesting". For instance, if we arrange the data from least to most `disp`, this will show the designs that optimized displacement the most. The following code shows how to display the 6 smallest `disp` rows.


In [4]:
# Arrange from least to most along `disp`
(
    df_data
    >> gr.tf_arrange(DF.disp)
    >> gr.tf_head(6)
)

Unnamed: 0,Timestamp,img_schematic,n_bands,n_metal,disp,img_implement
0,1/23/2026 9:06:54,https://drive.google.com/open?id=1XBPUxKRlZJAI...,10,1,-12.0,https://drive.google.com/thumbnail?id=1DJM4R4w...
1,1/23/2026 9:00:55,https://drive.google.com/open?id=1t7f-E01Wz0FB...,1,1,-8.0,https://drive.google.com/thumbnail?id=1zC7brnX...
2,1/23/2026 9:11:30,https://drive.google.com/open?id=1XqI-4IqL0qqc...,2,1,-6.5,https://drive.google.com/thumbnail?id=1feXn9nq...
3,1/23/2026 9:11:05,https://drive.google.com/open?id=1OQQ_-lmpp75G...,3,2,-2.23,https://drive.google.com/thumbnail?id=1Ba12HN5...
4,1/23/2026 8:59:45,https://drive.google.com/open?id=1L-YTm4GzG-2R...,2,2,0.0,https://drive.google.com/thumbnail?id=1k89EqQV...
5,1/23/2026 8:51:49,https://drive.google.com/open?id=1kSgVh5Xmip0C...,1,1,0.05,https://drive.google.com/thumbnail?id=16RgE5TA...


## Visualize

This code is quite a bit more complex, but really helpful for exploring the data.


In [5]:
fig = go.Figure(data=[
    go.Scatter(
        x=df_data["n_bands"],
        y=df_data["disp"],
        mode="markers",
        marker=dict(
            color="black",
            line={"color": "#444"},
            sizeref=45,
            opacity=0.8,
        )
    )
])

# Set font size to something readable
fig.update_layout(
    font=dict(
        family="Arial", # Set the default font family
        size=18,         # Set the default font size
        color="Black"
    )
)

# turn off native plotly.js hover effects - make sure to use
# hoverinfo="none" rather than "skip" which also halts events.
fig.update_traces(hoverinfo="none", hovertemplate=None)

fig.update_layout(
    xaxis=dict(title='Bands (#)'),
    yaxis=dict(title='Displacement (cm)'),
    plot_bgcolor='rgba(255,255,255,0.1)'
)

app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id="graph-basic-2", figure=fig, clear_on_unhover=True),
    dcc.Tooltip(id="graph-tooltip"),
])


@app.callback(
    Output("graph-tooltip", "show"),
    Output("graph-tooltip", "bbox"),
    Output("graph-tooltip", "children"),
    Input("graph-basic-2", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    pt = hoverData["points"][0]
    bbox = pt["bbox"]
    num = pt["pointNumber"]

    df_row = df_data.iloc[num]
    img_src = df_row['img_implement']
    name = "ID: {0}".format(num)

    children = [
        html.Div([
            html.Img(src=img_src, style={"width": "100%"}),
            html.H2(f"{name}", style={"color": "darkblue"}),
            # html.P(f"{form}"),
            # html.P(f"{desc}"),
        ], style={'width': '200px', 'white-space': 'normal'})
    ]

    return True, bbox, children


if __name__ == "__main__":
    app.run(debug=True)