In [11]:
import pandas as pd
import numpy as np

def winsorize(df, quantiles):
    """
    Winsorize the data in the DataFrame.
    
    df: pd.DataFrame
    quantiles: list
        ex: [0.05, 0.95]
    """
    lower, upper = np.quantile(df, quantiles)
    return df.clip(lower=lower, upper=upper)

In [12]:
df = pd.DataFrame(range(1,11), columns=['sequence'])
print(winsorize(df, [0.20, 0.80]).to_markdown())

|    |   sequence |
|---:|-----------:|
|  0 |        2.8 |
|  1 |        2.8 |
|  2 |        3   |
|  3 |        4   |
|  4 |        5   |
|  5 |        6   |
|  6 |        7   |
|  7 |        8   |
|  8 |        8.2 |
|  9 |        8.2 |


In [13]:
def group_winsorize(df, group_column, value_column, quantiles=[0.05, 0.95]):
    """
    Apply winsorizing to each group in the DataFrame.
    
    df: pd.DataFrame
    group_column: str
    value_column: str
    quantiles: list, default [0.05, 0.95]
    """
    return df.groupby(group_column).apply(lambda x: winsorize(x[[value_column]], quantiles)).reset_index(drop=True)

# Create the example DataFrame
groups = np.concatenate([np.ones(10), np.ones(10)+1,  np.ones(10)+2, np.ones(10)+3, np.ones(10)+4])
df = pd.DataFrame(data=zip(groups, range(1,51)), columns=["group", "sequence"])

# Apply group winsorizing
result = group_winsorize(df, 'group', 'sequence')

# Display the first rows of the result
print(result.head(11).to_markdown())

|    |   sequence |
|---:|-----------:|
|  0 |       1.45 |
|  1 |       2    |
|  2 |       3    |
|  3 |       4    |
|  4 |       5    |
|  5 |       6    |
|  6 |       7    |
|  7 |       8    |
|  8 |       9    |
|  9 |       9.55 |
| 10 |      11.45 |


  return df.groupby(group_column).apply(lambda x: winsorize(x[[value_column]], quantiles)).reset_index(drop=True)
