# Resampling

In [None]:
import pandas

In [None]:
df = pandas.read_csv('Messstationen_Tagesdaten_v2_Datensatz_19900101_20250515.csv')
df['time'] = pandas.to_datetime(df['time'])
df = df.set_index('time')
df = df.dropna()
df.head(5)

## Changing the frequency of a timeseries

In [None]:
print(f'start: {df.index[0]}')
print(f'end:   {df.index[-1]}')

In [None]:
# resample to yearly averages, in case of years, the new index label signifies the end of the period, see https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html
df.resample('1YE').mean()

In [None]:
# Monthly aveages
df.resample('1ME').mean()

### Resampling methods

In [None]:
# Monthly max
df.resample('1ME').max()

In [None]:
# Monthly variance
df.resample('1ME').var()

In [None]:
# Monthly median
df.resample('1ME').median()

...

A full list of existing resampling strategies can be found here: https://pandas.pydata.org/docs/reference/resampling.html

### Other approaches

In [None]:
# simply produce a new frequency given the original data of those timestamps in original data (so the 2002-01-31 00:00:00 data is the same as in the original dataframe, the stuff inbetween is just dropped)
df.asfreq('1ME')

## Upsampling
With the above, we only downsampled data - we reduce the amount of datapoints.
However, theres also strategies to upsample data, therefore increase the number of datapoints.

In [None]:
# While pandas does let us do this, this of course produces a lot of NaN (Not A Number) points since there is no data to compute that.
df.resample('1h').mean()

In [None]:
# Simplest strategy would probably be forward fill - fill all values with the last known one
df.resample('1h').ffill()

In [None]:
# But we can also interpolate, by default interpolation method is linear.
df.resample('1h').interpolate()

In [None]:
# Theres multiple other interpolation options, see https://pandas.pydata.org/docs/reference/api/pandas.core.resample.Resampler.interpolate.html#pandas.core.resample.Resampler.interpolate
df.resample('1h').interpolate(method='cubic')