Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/simple statistics #773

Merged
merged 12 commits into from
Feb 10, 2022
4 changes: 3 additions & 1 deletion darts/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def coefficient_of_variation(
"""

return (
100 * rmse(actual_series, pred_series, intersect) / actual_series.mean().mean()
100
* rmse(actual_series, pred_series, intersect)
/ actual_series.pd_dataframe(copy=False).mean().mean()
)


Expand Down
2 changes: 1 addition & 1 deletion darts/models/forecasting/theta.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def fit(self, series):
self.length = len(series)
# normalization of data
if self.normalization:
self.mean = series.mean().mean()
self.mean = series.pd_dataframe(copy=False).mean().mean()
raise_if_not(
not np.isclose(self.mean, 0),
"The mean value of the provided series is too close to zero to perform normalization",
Expand Down
111 changes: 111 additions & 0 deletions darts/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import xarray as xr
from tempfile import NamedTemporaryFile
from unittest.mock import patch
from scipy.stats import skew, kurtosis

from darts.tests.base_test_class import DartsBaseTestClass
from darts import TimeSeries, concatenate
Expand Down Expand Up @@ -1452,3 +1453,113 @@ def test_time_col_convert_garbage(self):

with self.assertRaises(AttributeError):
TimeSeries.from_dataframe(df=df, time_col="Time")


class SimpleStatisticsTestCase(DartsBaseTestClass):
gnwhr marked this conversation as resolved.
Show resolved Hide resolved

times = pd.date_range("20130101", "20130110", freq="D")
values = np.random.rand(10, 2, 1000)
gnwhr marked this conversation as resolved.
Show resolved Hide resolved
ar = xr.DataArray(
values,
dims=("time", "component", "sample"),
coords={"time": times, "component": ["a", "b"]},
)
ts = TimeSeries(ar)

def test_mean(self):
for axis in range(3):
new_ts = self.ts.mean(axis=axis)
# check values
self.assertTrue(
np.isclose(
new_ts._xa.values, self.values.mean(axis=axis, keepdims=True)
).all()
)

def test_var(self):
for ddof in range(5):
new_ts = self.ts.var(ddof=ddof)
# check values
self.assertTrue(
np.isclose(new_ts.values(), self.values.var(ddof=ddof, axis=2)).all()
)

def test_std(self):
for ddof in range(5):
new_ts = self.ts.std(ddof=ddof)
# check values
self.assertTrue(
np.isclose(new_ts.values(), self.values.std(ddof=ddof, axis=2)).all()
)

def test_skew(self):
for bias in [True, False]:
new_ts = self.ts.skew(bias=bias)
# check values
self.assertTrue(
np.isclose(new_ts.values(), skew(self.values, axis=2, bias=bias)).all()
)

def test_kurtosis(self):
for bias in [True, False]:
for fisher in [True, False]:
new_ts = self.ts.kurtosis(bias=bias, fisher=fisher)
# check values
self.assertTrue(
np.isclose(
new_ts.values(),
kurtosis(self.values, axis=2, bias=bias, fisher=fisher),
).all()
)

def test_min(self):
for axis in range(3):
new_ts = self.ts.min(axis=axis)
# check values
self.assertTrue(
np.isclose(
new_ts._xa.values, self.values.min(axis=axis, keepdims=True)
).all()
)

def test_max(self):
for axis in range(3):
new_ts = self.ts.max(axis=axis)
# check values
self.assertTrue(
np.isclose(
new_ts._xa.values, self.values.max(axis=axis, keepdims=True)
).all()
)

def test_sum(self):
for axis in range(3):
new_ts = self.ts.sum(axis=axis)
# check values
self.assertTrue(
np.isclose(
new_ts._xa.values, self.values.sum(axis=axis, keepdims=True)
).all()
)

def test_median(self):
for axis in range(3):
new_ts = self.ts.median(axis=axis)
# check values
self.assertTrue(
np.isclose(
new_ts._xa.values, np.median(self.values, axis=axis, keepdims=True)
).all()
)

def test_quantile(self):
for method in ["linear", "inverted_cdf"]:
for q in [0.01, 0.1, 0.5, 0.95]:
new_ts = self.ts.quantile(quantile=q, method=method)
# check values
self.assertTrue(
np.isclose(
new_ts.values(),
np.quantile(self.values, q=q, method=method, axis=2),
).all()
)
Loading