In [1]:
import polars as pl
from polars.testing.parametric import dataframes, column

In [2]:
pl.Config.set_tbl_rows(100)

Book = pl.Enum("XYZ")

def generate(size=20):
    return dataframes(
    [
        column("id", dtype=pl.UInt16, unique=True, allow_null=False, ), 
        column("value", dtype=pl.UInt64, allow_null=False), 
        column("category", dtype = Book, allow_null=False)
    ], 
    min_size=size, max_size=size)

original = generate().example()

In [3]:
original

id,value,category
u16,u64,enum
4067,235,"""Y"""
41792,14859,"""Z"""
57974,3528471696,"""Z"""
11932,6886,"""Y"""
1335,2156407849,"""Y"""
52879,17579990059705017543,"""X"""
26422,22308,"""Z"""
12369,3754081059,"""Z"""
35845,1466847458764306160,"""X"""
965,4095959279508537201,"""Y"""


In [4]:
def mutate(df, k,j):
    return df.with_columns(
        category = pl.when(pl.col.category == k).then(pl.lit(j).cast(Book)).otherwise("category"),
    )

In [5]:
new = pl.concat([mutate(original, "X","Z").head(18), generate(3).example()])

In [6]:
new

id,value,category
u16,u64,enum
4067,235,"""Y"""
41792,14859,"""Z"""
57974,3528471696,"""Z"""
11932,6886,"""Y"""
1335,2156407849,"""Y"""
52879,17579990059705017543,"""Z"""
26422,22308,"""Z"""
12369,3754081059,"""Z"""
35845,1466847458764306160,"""Z"""
965,4095959279508537201,"""Y"""


In [7]:
original.group_by("category").agg(pl.sum("value"))

category,value
enum,u64
"""Y""",1520745072658032387
"""Z""",11323287274
"""X""",11583240869068713019


In [8]:
new.group_by("category").agg(pl.sum("value"))

category,value
enum,u64
"""Y""",1520745072658081379
"""Z""",600093456083183415
"""X""",24840


In [9]:
full = original.join(new, on="id", how="full")

In [10]:
full

id,value,category,id_right,value_right,category_right
u16,u64,enum,u16,u64,enum
4067.0,235,"""Y""",4067.0,235,"""Y"""
41792.0,14859,"""Z""",41792.0,14859,"""Z"""
57974.0,3528471696,"""Z""",57974.0,3528471696,"""Z"""
11932.0,6886,"""Y""",11932.0,6886,"""Y"""
1335.0,2156407849,"""Y""",1335.0,2156407849,"""Y"""
52879.0,17579990059705017543,"""X""",52879.0,17579990059705017543,"""Z"""
26422.0,22308,"""Z""",26422.0,22308,"""Z"""
12369.0,3754081059,"""Z""",12369.0,3754081059,"""Z"""
35845.0,1466847458764306160,"""X""",35845.0,1466847458764306160,"""Z"""
965.0,4095959279508537201,"""Y""",965.0,4095959279508537201,"""Y"""


In [11]:
full.group_by(pl.col.category, pl.col.category_right).agg(pl.sum("value"), pl.sum("value_right"))

category,category_right,value,value_right
enum,enum,u64,u64
"""Z""","""Z""",11323285349,11323285349
"""X""","""Z""",600093444759862515,600093444759862515
"""Y""","""Y""",1520745072658032387,1520745072658032387
,"""X""",0,24840
"""X""",,10983147424308850504,0
"""Z""",,1925,0
,"""Z""",0,35551
,"""Y""",0,48992


In [12]:
from polars.testing import assert_frame_equal

assert_frame_equal(
    full.group_by(pl.col.category, pl.col.category_right).agg(pl.sum("value"), pl.sum("value_right"))
    .group_by(pl.col.category).agg(pl.sum("value")).drop_nulls().sort("category"),
    original.group_by(pl.col.category).agg(pl.sum("value")).sort("category")
)

In [13]:
original.group_by("category").agg(pl.sum("value"))

category,value
enum,u64
"""Y""",1520745072658032387
"""X""",11583240869068713019
"""Z""",11323287274


In [14]:
new.group_by("category").agg(pl.sum("value"))

category,value
enum,u64
"""X""",24840
"""Y""",1520745072658081379
"""Z""",600093456083183415
