https://towardsdatascience.com/when-to-use-pandas-transform-function-df8861aa0dcf

# Tranform Each Column

In [38]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1,2,3], 'B': [10,20,30] })
def plus_10(x):
    return x+10
df.transform(plus_10)

Unnamed: 0,A,B
0,11,20
1,12,30
2,13,40


In [39]:
df.transform('sqrt')

Unnamed: 0,A,B
0,1.0,3.162278
1,1.414214,4.472136
2,1.732051,5.477226


In [40]:
df.transform([np.sqrt, np.exp])

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,sqrt,exp,sqrt,exp
0,1.0,2.718282,3.162278,22026.47
1,1.414214,7.389056,4.472136,485165200.0
2,1.732051,20.085537,5.477226,10686470000000.0


In [41]:
df.transform({
    'A': np.sqrt,
    'B': np.exp,
})

Unnamed: 0,A,B
0,1.0,22026.47
1,1.414214,485165200.0
2,1.732051,10686470000000.0


# Combining Groupby Results

In [42]:
df = pd.DataFrame({
  'restaurant_id': [101,102,103,104,105,106,107],
  'address': ['A','B','C','D', 'E', 'F', 'G'],
  'city': ['London','London','London','Oxford','Oxford', 'Durham', 'Durham'],
  'sales': [10,500,48,12,21,22,14]
})
display(df)

Unnamed: 0,restaurant_id,address,city,sales
0,101,A,London,10
1,102,B,London,500
2,103,C,London,48
3,104,D,Oxford,12
4,105,E,Oxford,21
5,106,F,Durham,22
6,107,G,Durham,14


## Classic Way

In [43]:
city_sales = df.groupby('city')['sales'].apply(sum).rename('city_total_sales').reset_index()
print(type(city_sales))

<class 'pandas.core.frame.DataFrame'>


  city_sales = df.groupby('city')['sales'].apply(sum).rename('city_total_sales').reset_index()


In [44]:
print(type(df.groupby('city')['sales']))
list(df.groupby('city')['sales'].apply(sum).rename('city_total_sales').reset_index())

<class 'pandas.core.groupby.generic.SeriesGroupBy'>


  list(df.groupby('city')['sales'].apply(sum).rename('city_total_sales').reset_index())


['city', 'city_total_sales']

In [45]:
print(type(city_sales))
city_sales

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,city,city_total_sales
0,Durham,36
1,London,558
2,Oxford,33


In [46]:
df_new = pd.merge(df, city_sales, how='left')

In [47]:
display(df_new)

Unnamed: 0,restaurant_id,address,city,sales,city_total_sales
0,101,A,London,10,558
1,102,B,London,500,558
2,103,C,London,48,558
3,104,D,Oxford,12,33
4,105,E,Oxford,21,33
5,106,F,Durham,22,36
6,107,G,Durham,14,36


In [48]:
df_new['pct'] = df_new['sales'] / df_new['city_total_sales']

In [49]:
display(df_new)

Unnamed: 0,restaurant_id,address,city,sales,city_total_sales,pct
0,101,A,London,10,558,0.017921
1,102,B,London,500,558,0.896057
2,103,C,London,48,558,0.086022
3,104,D,Oxford,12,33,0.363636
4,105,E,Oxford,21,33,0.636364
5,106,F,Durham,22,36,0.611111
6,107,G,Durham,14,36,0.388889


The transform function retains the same number of items as the original dataset after performing the transformation. Therefore, a one-line step using groupby followed by a transform(sum) returns the same output

## Transform Approach

In [50]:
df['city_total_sales'] = df.groupby('city')['sales'].transform('sum')
display(df)

Unnamed: 0,restaurant_id,address,city,sales,city_total_sales
0,101,A,London,10,558
1,102,B,London,500,558
2,103,C,London,48,558
3,104,D,Oxford,12,33
4,105,E,Oxford,21,33
5,106,F,Durham,22,36
6,107,G,Durham,14,36


In [51]:
df['pct'] = df['sales'] / df['city_total_sales']
df['pct'] = df['pct'].apply(lambda x: format(x, '.2%'))
display(df)

Unnamed: 0,restaurant_id,address,city,sales,city_total_sales,pct
0,101,A,London,10,558,1.79%
1,102,B,London,500,558,89.61%
2,103,C,London,48,558,8.60%
3,104,D,Oxford,12,33,36.36%
4,105,E,Oxford,21,33,63.64%
5,106,F,Durham,22,36,61.11%
6,107,G,Durham,14,36,38.89%


# Filtering

In [52]:
df[df.groupby('city')['sales'].transform('sum') > 40]

Unnamed: 0,restaurant_id,address,city,sales,city_total_sales,pct
0,101,A,London,10,558,1.79%
1,102,B,London,500,558,89.61%
2,103,C,London,48,558,8.60%


In [53]:
df = pd.DataFrame({
    'name': ['A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'],
    'value': [1, np.nan, np.nan, 2, 8, 2, np.nan, 3]
})

In [54]:
df.groupby('name')['value'].mean()

name
A    1.0
B    5.0
C    2.5
Name: value, dtype: float64

In [56]:
df['value'] = df.groupby('name').transform(lambda x: x.fillna(x.mean()))

In [57]:
display(df)

Unnamed: 0,name,value
0,A,1.0
1,A,1.0
2,B,5.0
3,B,2.0
4,B,8.0
5,C,2.0
6,C,2.5
7,C,3.0


In [60]:
import pandas as pd

# create a sample DataFrame
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                   'C': [1, 2, 3, 4, 5, 6, 7, 8],
                   'D': [9, 10, 11, 12, 13, 14, 15, 16]})

# group the DataFrame by column A
grouped = df.groupby('A')

# view the groups of the GroupBy object
print(grouped.groups)

{'bar': [1, 3, 5], 'foo': [0, 2, 4, 6, 7]}
