# Lazy mode

In [1]:
import polars as pl

In [2]:
csv_file = "data/titanic.csv"

In [3]:
(
    pl.read_csv(csv_file)
    .group_by("Pclass")
    .agg(
        pl.col("Age").mean()
    )
)

Pclass,Age
i64,f64
2,29.87763
3,25.14062
1,38.233441


In [4]:
(
    pl.scan_csv(csv_file)
    .group_by("Pclass")
    .agg(
        pl.col("Age").mean()
    )
    .collect()
)

Pclass,Age
i64,f64
3,25.14062
2,29.87763
1,38.233441


In [5]:
df_eager = pl.read_csv(csv_file)
df_eager.head(2)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""


In [6]:
df_lazy = pl.scan_csv(csv_file)
df_lazy

In [7]:
(
    df_lazy
    .head(3)
    .collect()
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [8]:
print(type(
    df_lazy
    .head(3)
    .collect()
))

<class 'polars.dataframe.frame.DataFrame'>


In [9]:
df_eager.schema

Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [10]:
df_lazy.schema

  df_lazy.schema


Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [11]:
(
    df_lazy
    .collect_schema()
)

Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [12]:
df_lazy.columns

  df_lazy.columns


['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [13]:
(
    df_lazy
    .collect_schema()
    .names()
)

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [14]:
(
    df_lazy
    .select(
        pl.len()
    )
    .collect()
)

len
u32
891


In [15]:
(
    pl.LazyFrame(
        {"values": [0, 1, 2]}
    )
)

In [16]:
(
    pl.DataFrame(
        {"values": [0, 1, 2]}
    )
    .lazy()
)

In [17]:
(
    df_eager
    .rename({"PassengerId": "Id"})
    .head(2)
)

Id,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""


In [18]:
(
    df_lazy
    .rename({"PassengerId": "Id"})
)

In [19]:
# Chaining
(
    pl.scan_csv(csv_file)
    .rename({"PassengerId": "Id"})
)

In [20]:
# Re-assigning
df_lazy = pl.scan_csv(csv_file)
df_lazy = df_lazy.rename({"PassengerId": "Id"})

In [21]:
print(
    pl.scan_csv(csv_file)
    .explain()
) 

Csv SCAN [data/titanic.csv] [id: 6518788457104]
PROJECT */12 COLUMNS
