# Missing values

In [1]:
import polars as pl
import polars.selectors as cs

In [2]:
df = pl.DataFrame(
    {
        "col1": [None, 2, 3, 4],
        "col2": [None, None, "a", "b"]
    }
)
df

col1,col2
i64,str
,
2.0,
3.0,"""a"""
4.0,"""b"""


In [3]:
df.null_count()

col1,col2
u32,u32
1,2


In [4]:
(
    df
    .select(
        [
            pl.col("col1"),
            pl.col("col1").is_null().alias("is_null"),
            pl.col("col1").is_not_null().alias("is_not_null")
        ]
    )
)

col1,is_null,is_not_null
i64,bool,bool
,True,False
2.0,False,True
3.0,False,True
4.0,False,True


In [5]:
(
    df
    .filter(
        pl.col("col1").is_not_null()
    )
)

col1,col2
i64,str
2,
3,"""a"""
4,"""b"""


In [6]:
(
    df
    .filter(
        pl.any_horizontal(pl.all().is_not_null())
    )
)

col1,col2
i64,str
2,
3,"""a"""
4,"""b"""


In [7]:
(
    df
    .filter(
        pl.all_horizontal(pl.all().is_not_null())
    )
)

col1,col2
i64,str
3,"""a"""
4,"""b"""


In [8]:
(
    df
    .drop_nulls()
)

col1,col2
i64,str
3,"""a"""
4,"""b"""


In [9]:
(
    df
    .drop_nulls(subset=["col1"])
)

col1,col2
i64,str
2,
3,"""a"""
4,"""b"""


In [10]:
df = pl.DataFrame(
    {
        "col1": [0, None, 2, 3],
        "col2": [0, None, None, 3],
        "col3": ["a", None, "c", "d"]
    }
)
df

col1,col2,col3
i64,i64,str
0.0,0.0,"""a"""
,,
2.0,,"""c"""
3.0,3.0,"""d"""


In [13]:
(
    df
    .with_columns(
        pl.all().fill_null(0).name.suffix("_new")
    )
)

col1,col2,col3,col1_new,col2_new,col3_new
i64,i64,str,i64,i64,str
0.0,0.0,"""a""",0,0,"""a"""
,,,0,0,"""0"""
2.0,,"""c""",2,0,"""c"""
3.0,3.0,"""d""",3,3,"""d"""


In [14]:
(
    df
    .with_columns(
        pl.all().fill_null("missing").name.suffix("_new")
    )
)

col1,col2,col3,col1_new,col2_new,col3_new
i64,i64,str,str,str,str
0.0,0.0,"""a""","""0""","""0""","""a"""
,,,"""missing""","""missing""","""missing"""
2.0,,"""c""","""2""","""missing""","""c"""
3.0,3.0,"""d""","""3""","""3""","""d"""


In [15]:
(
    df
    .with_columns(
        pl.all().fill_null(strategy="forward").name.suffix("_new")
    )
)

col1,col2,col3,col1_new,col2_new,col3_new
i64,i64,str,i64,i64,str
0.0,0.0,"""a""",0,0,"""a"""
,,,0,0,"""a"""
2.0,,"""c""",2,0,"""c"""
3.0,3.0,"""d""",3,3,"""d"""


In [16]:
df = pl.DataFrame(
    {
        "group": ["A", "B", "A", "B", "A", "B"],
        "col1": [0, 1, None, 1, 2, None]
    }
)
df

group,col1
str,i64
"""A""",0.0
"""B""",1.0
"""A""",
"""B""",1.0
"""A""",2.0
"""B""",


In [17]:
(
    df
    .with_columns(
        pl.col("col1").fill_null(strategy="forward").over("group").name.suffix("_filled")
    )
)

group,col1,col1_filled
str,i64,i64
"""A""",0.0,0
"""B""",1.0,1
"""A""",,0
"""B""",1.0,1
"""A""",2.0,2
"""B""",,1
