## Working with the Lazy API

- https://pola-rs.github.io/polars-book/user-guide/quickstart/intro.html?highlight=lazy#lazy-quick-start

In [17]:
import polars as pl

# to enrich the examples in this quickstart with dates
from datetime import datetime, timedelta 
# to generate data for the examples
import numpy as np 

In [18]:
df = pl.read_csv("https://j.mp/iriscsv")

### Eager

In [19]:
(df
    .filter(pl.col("sepal_length") > 5)
    .groupby("species", maintain_order=True)
    .agg(pl.all().sum())
)

species,sepal_length,sepal_width,petal_length,petal_width
str,f64,f64,f64,f64
"""setosa""",116.9,81.7,33.2,6.1
"""versicolor""",281.9,131.8,202.9,63.3
"""virginica""",324.5,146.2,273.1,99.6


### Lazy

sandwitch code between
- .lazy() to begin query
- .collect() at the end

In [20]:
(df.lazy()
    .filter(pl.col("sepal_length") > 5)
    .groupby("species", maintain_order=True)
    .agg(pl.all().sum())
.collect()
)

species,sepal_length,sepal_width,petal_length,petal_width
str,f64,f64,f64,f64
"""setosa""",116.9,81.7,33.2,6.1
"""versicolor""",281.9,131.8,202.9,63.3
"""virginica""",324.5,146.2,273.1,99.6


#### Use `pipe()`

In [21]:
def df_op_1(df):
    return (
        df
        .filter(pl.col("sepal_length") > 5)
        .groupby("species", maintain_order=True)
        .agg(pl.all().sum())
    )

In [22]:
df.lazy().pipe(df_op_1).collect()

species,sepal_length,sepal_width,petal_length,petal_width
str,f64,f64,f64,f64
"""setosa""",116.9,81.7,33.2,6.1
"""versicolor""",281.9,131.8,202.9,63.3
"""virginica""",324.5,146.2,273.1,99.6
