# Selecting columns

In [1]:
import polars as pl
import polars.selectors as cs

In [2]:
csvFile = "data/titanic.csv"

In [3]:
df = pl.read_csv(csvFile)
df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [4]:
df["Age"].head(3)

Age
f64
22.0
38.0
26.0


In [5]:
type(df["Age"].head(3))

polars.series.series.Series

In [6]:
df[["Survived", "Age"]].head(3)

Survived,Age
i64,f64
0,22.0
1,38.0
1,26.0


In [7]:
type(df[["Survived", "Age"]].head(3))

polars.dataframe.frame.DataFrame

In [8]:
df[0, "Age"]

22.0

In [9]:
df[(0, "Age")]

22.0

In [10]:
df[[0, 1], ["Age", "Fare"]]

Age,Fare
f64,f64
22.0,7.25
38.0,71.2833


In [11]:
df[:, 1:6].head(3)

Survived,Pclass,Name,Sex,Age
i64,i64,str,str,f64
0,3,"""Braund, Mr. Owen Harris""","""male""",22.0
1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0
1,3,"""Heikkinen, Miss. Laina""","""female""",26.0


In [12]:
df[:2, "Survived": "Sex"]

Survived,Pclass,Name,Sex
i64,i64,str,str
0,3,"""Braund, Mr. Owen Harris""","""male"""
1,1,"""Cumings, Mrs. John Bradley (Fl…","""female"""


In [13]:
(
    df
    .select("Age")
    .head(3)
)

Age
f64
22.0
38.0
26.0


In [14]:
(
    df
    .select("Age")
    .to_series()
    .head(3)
)

Age
f64
22.0
38.0
26.0


In [15]:
(
    df
    .select(["Survived", "Age"])
    .head(3)
)

Survived,Age
i64,f64
0,22.0
1,38.0
1,26.0


In [16]:
(
    df
    .select(
        pl.col("Age")
    )
    .head(3)
)

Age
f64
22.0
38.0
26.0


# Selecting and transforming a column with an expression

In [17]:
(
    df
    .select(
        pl.col("Fare").round(0)
    )
    .head(3)
)

Fare
f64
7.0
71.0
8.0


In [18]:
(
    df
    .select(
        [
            pl.col("Fare"),
            pl.col("Fare").round(0).alias("roundedFare")
        ]
    )
    .head(3)
)

Fare,roundedFare
f64,f64
7.25,7.0
71.2833,71.0
7.925,8.0


In [19]:
df = (
    pl.scan_csv(csvFile)
    .select(["Survived", "Age"])
)
print(df.explain())

Csv SCAN [data/titanic.csv] [id: 4962735234784]
PROJECT 2/12 COLUMNS


In [20]:
csvFile = "data/titanic.csv"

In [21]:
df = pl.read_csv(csvFile)
df.head(3)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [22]:
(
    df
    .select(
        pl.all()
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [23]:
(
    df
    .select(
        pl.exclude(["PassengerId", "Survived", "Pclass"])
    )
    .head(3)
)

Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
str,str,f64,i64,i64,str,f64,str,str
"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [24]:
(
    df
    .select("^P.*$")
    .head(3)
)

PassengerId,Pclass,Parch
i64,i64,i64
1,3,0
2,1,0
3,3,0


In [25]:
(
    df
    .select(
        pl.col(pl.Utf8)
    )
    .head(3)
)

Name,Sex,Ticket,Cabin,Embarked
str,str,str,str,str
"""Braund, Mr. Owen Harris""","""male""","""A/5 21171""",,"""S"""
"""Cumings, Mrs. John Bradley (Fl…","""female""","""PC 17599""","""C85""","""C"""
"""Heikkinen, Miss. Laina""","""female""","""STON/O2. 3101282""",,"""S"""


In [26]:
(
    df
    .select(
        pl.col([pl.Int64, pl.Float64])
    )
    .head(3)
)

PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
i64,i64,i64,f64,i64,i64,f64
1,0,3,22.0,1,0,7.25
2,1,1,38.0,1,0,71.2833
3,1,3,26.0,0,0,7.925


In [27]:
(
    df
    .select(
        pl.col(pl.NUMERIC_DTYPES)
    )
    .head(3)
)

  pl.col(pl.NUMERIC_DTYPES)


PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
i64,i64,i64,f64,i64,i64,f64
1,0,3,22.0,1,0,7.25
2,1,1,38.0,1,0,71.2833
3,1,3,26.0,0,0,7.925


In [28]:
pl.NUMERIC_DTYPES

  pl.NUMERIC_DTYPES


frozenset({Decimal,
           Float32,
           Float64,
           Int128,
           Int16,
           Int32,
           Int64,
           Int8,
           UInt16,
           UInt32,
           UInt64,
           UInt8})

In [29]:
[el for el in dir(pl) if "_DTYPES" in el]

[]

In [30]:
(
    df
    .select(
        cs.all()
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [31]:
(
    df
    .select(
        cs.first()
    )
    .head(3)
)

PassengerId
i64
1
2
3


In [32]:
(
    df
    .select(
        cs.all().max()
    )
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
891,1,3,"""van Melkebeke, Mr. Philemon""","""male""",80.0,8,6,"""WE/P 5735""",512.3292,"""T""","""S"""


In [33]:
(
    df
    .select(
        cs.numeric()
    )
    .head(3)
)

PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
i64,i64,i64,f64,i64,i64,f64
1,0,3,22.0,1,0,7.25
2,1,1,38.0,1,0,71.2833
3,1,3,26.0,0,0,7.925


In [34]:
(
    df
    .select(
        ~cs.by_name("Pclass", "Age")
    )
    .head(3)
)

PassengerId,Survived,Name,Sex,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,str,str,i64,i64,str,f64,str,str
1,0,"""Braund, Mr. Owen Harris""","""male""",1,0,"""A/5 21171""",7.25,,"""S"""
2,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,"""Heikkinen, Miss. Laina""","""female""",0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [35]:
(
    df
    .select(
        cs.starts_with("P")
    )
    .head(3)
)

PassengerId,Pclass,Parch
i64,i64,i64
1,3,0
2,1,0
3,3,0


In [36]:
(
    df
    .select(
        cs.starts_with("P", "A")
    )
    .head(3)
).columns

['PassengerId', 'Pclass', 'Age', 'Parch']

In [37]:
(
    df
    .select(
        cs.matches("Age|Fare")
    )
    .head(3)
)

Age,Fare
f64,f64
22.0,7.25
38.0,71.2833
26.0,7.925


In [38]:
(
    df
    .select(
        cs.numeric() & cs.contains("A")
    )
    .head(3)
)

Age
f64
22.0
38.0
26.0


In [39]:
(
    df
    .select(
        cs.string() | cs.contains("P")
    )
    .head(3)
)

PassengerId,Pclass,Name,Sex,Parch,Ticket,Cabin,Embarked
i64,i64,str,str,i64,str,str,str
1,3,"""Braund, Mr. Owen Harris""","""male""",0,"""A/5 21171""",,"""S"""
2,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",0,"""PC 17599""","""C85""","""C"""
3,3,"""Heikkinen, Miss. Laina""","""female""",0,"""STON/O2. 3101282""",,"""S"""


In [41]:
(
    df
    .select(
        cs.string() - cs.starts_with("T")
    )
    .head(3)
)

Name,Sex,Cabin,Embarked
str,str,str,str
"""Braund, Mr. Owen Harris""","""male""",,"""S"""
"""Cumings, Mrs. John Bradley (Fl…","""female""","""C85""","""C"""
"""Heikkinen, Miss. Laina""","""female""",,"""S"""


In [44]:
df = pl.read_csv(csvFile)

In [46]:
(
    df
    .with_columns(
        pl.col("Fare").round(0)
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.0,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.0,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",8.0,,"""S"""


In [47]:
df = pl.read_csv(csvFile)
(
    df
    .with_columns(
        pl.col("Fare").round(0).alias("roundFare")
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,roundFare
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",7.0
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",71.0
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S""",8.0


In [49]:
df = pl.read_csv(csvFile)
(
    df
    .with_columns(
        (pl.col("Fare") * 2).alias("doubleFare")
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,doubleFare
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",14.5
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",142.5666
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S""",15.85


In [50]:
df = pl.read_csv(csvFile)
(
    df
    .with_columns(
        (pl.col("Fare") + pl.col("Age")).alias("farePlusAge")
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,farePlusAge
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",29.25
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",109.2833
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S""",33.925


In [51]:
df = pl.read_csv(csvFile)
(
    df
    .with_columns(
        pl.lit("yes").alias("Aboard")
    )
    .select(["Name", "Aboard"])
    .head(3)
)

Name,Aboard
str,str
"""Braund, Mr. Owen Harris""","""yes"""
"""Cumings, Mrs. John Bradley (Fl…","""yes"""
"""Heikkinen, Miss. Laina""","""yes"""
