[![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial03_scaling.ipynb)

# User Guide Tutorial 03: Preprocessing › Scaling

This tutorial shows how to use TemporAI `preprocessing.scaling` plugins.

## All `preprocessing.scaling` plugins

To see all the relevant plugins:

In [None]:
from tempor import plugin_loader

plugin_loader.list()["preprocessing"]["scaling"]

{'static': ['static_minmax_scaler', 'static_standard_scaler'],
 'temporal': ['ts_minmax_scaler', 'ts_standard_scaler']}

Now also load data source(s) we will use:

In [None]:
SineDataSource = plugin_loader.get_class("prediction.one_off.sine", plugin_type="datasource")

## Using a static data scaling plugin

In [None]:
from tempor import plugin_loader

dataset = SineDataSource(static_scale=5.0, random_state=42).load()
print(dataset)

model = plugin_loader.get("preprocessing.scaling.static.static_minmax_scaler", static_imputer="mean")
print(model)

OneOffPredictionDataset(
    time_series=TimeSeriesSamples([100, *, 5]),
    static=StaticSamples([100, 4]),
    predictive=OneOffPredictionTaskData(targets=StaticSamples([100, 1]))
)
StaticMinMaxScaler(
    name='static_minmax_scaler',
    category='preprocessing.scaling.static',
    plugin_type='method',
    params={'feature_range': [0, 1], 'clip': False}
)


In [None]:
# Note the scale of static features.

from IPython.display import display

print("Min, max values per feature:")
display(dataset.static.dataframe().describe().T.loc[:, ["min", "max"]])  # type: ignore

dataset.static

Min, max values per feature:


Unnamed: 0,min,max
0,0.025308,4.8181
1,0.045985,4.950269
2,0.102922,4.952526
3,0.082939,4.85891


Unnamed: 0_level_0,0,1,2,3
sample_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1.872701,4.753572,3.659970,2.993292
1,0.780093,0.779973,0.290418,4.330881
2,3.005575,3.540363,0.102922,4.849549
3,4.162213,1.061696,0.909125,0.917023
4,1.521211,2.623782,2.159725,1.456146
...,...,...,...,...
95,0.590824,3.483686,3.144714,4.387360
96,3.675355,4.017405,1.410173,0.887198
97,3.753074,4.034174,4.952526,2.063088
98,1.860090,3.882065,1.704018,4.653787


In [None]:
# Note the new scale of static features.

dataset = model.fit_transform(dataset)  # Or call fit() then transform().

print("Min, max values per feature:")
display(dataset.static.dataframe().describe().T.loc[:, ["min", "max"]])  # type: ignore

dataset.static

Min, max values per feature:


Unnamed: 0,min,max
0,0.0,1.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0


Unnamed: 0_level_0,0,1,2,3
sample_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.385452,0.959893,0.733472,0.609374
1,0.157483,0.149662,0.038662,0.889440
2,0.621823,0.712515,0.000000,0.998040
3,0.863151,0.207107,0.166241,0.174642
4,0.312115,0.525621,0.424118,0.287524
...,...,...,...,...
95,0.117993,0.700959,0.627225,0.901266
96,0.761570,0.809786,0.269558,0.168397
97,0.777786,0.813205,1.000000,0.414607
98,0.382821,0.782190,0.330150,0.957051


## Using a temporal data scaling plugin

In [None]:
from tempor import plugin_loader

dataset = SineDataSource(ts_scale=5.0, random_state=42).load()
print(dataset)

model = plugin_loader.get("preprocessing.scaling.temporal.ts_standard_scaler")
print(model)

OneOffPredictionDataset(
    time_series=TimeSeriesSamples([100, *, 5]),
    static=StaticSamples([100, 4]),
    predictive=OneOffPredictionTaskData(targets=StaticSamples([100, 1]))
)
TimeSeriesStandardScaler(
    name='ts_standard_scaler',
    category='preprocessing.scaling.temporal',
    plugin_type='method',
    params={'with_mean': True, 'with_std': True}
)


In [None]:
# Note the scale of time series features.

from IPython.display import display

print("Min, max values per feature:")
display(dataset.time_series.dataframe().describe().T.loc[:, ["min", "max"]])

dataset.time_series

Min, max values per feature:


Unnamed: 0,min,max
0,-4.999519,4.999999
1,-4.999982,4.999999
2,-4.999923,4.999992
3,-4.999979,5.0
4,-4.99997,4.999928


Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4
sample_idx,time_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,-0.095075,-0.240884,-0.542729,2.209324,0.122539
0,1,1.500152,1.822750,2.882952,4.450264,2.673609
0,2,2.939520,3.567547,4.864963,4.930896,4.464728
0,3,4.073484,4.688307,4.410788,3.461105,4.986786
0,4,4.784229,4.988986,1.747861,0.622269,4.091392
...,...,...,...,...,...,...
99,5,4.835604,0.634449,4.634897,4.910111,4.815565
99,6,3.532665,2.066645,2.845170,3.281070,4.946122
99,7,1.263739,3.315606,0.121903,0.253339,4.998853
99,8,-1.350749,4.270597,-2.641363,-2.882388,4.972930


In [None]:
# Note the new scale of time series features.

dataset = model.fit_transform(dataset)  # Or call fit() then transform().

print("Min, max values per feature:")
display(dataset.time_series.dataframe().describe().T.loc[:, ["min", "max"]])

dataset.time_series

Min, max values per feature:


Unnamed: 0,min,max
0,-1.711349,1.200819
1,-1.724449,1.239101
2,-1.734762,1.230568
3,-1.592516,1.277314
4,-1.728804,1.17717


Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4
sample_idx,time_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,-0.283024,-0.314064,-0.413046,0.476436,-0.240201
0,1,0.181555,0.297505,0.602791,1.119549,0.501140
0,2,0.600744,0.814586,1.190527,1.257482,1.021640
0,3,0.930989,1.146729,1.055848,0.835676,1.173350
0,4,1.137980,1.235837,0.266196,0.020977,0.913149
...,...,...,...,...,...,...
99,5,1.152942,-0.054654,1.122305,1.251517,1.123594
99,6,0.773486,0.369785,0.591587,0.784009,1.161533
99,7,0.112705,0.739922,-0.215959,-0.084900,1.176857
99,8,-0.648715,1.022938,-1.035365,-0.984802,1.169324
