Skip to content

Commit

Permalink
small refactoring for better api doc
Browse files Browse the repository at this point in the history
  • Loading branch information
wingechr committed Dec 20, 2023
1 parent 5530348 commit c3e7da5
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 53 deletions.
3 changes: 3 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# this file contains non standard yaml sections for options
# that must not be corrected by prettier
docs/api.md
14 changes: 2 additions & 12 deletions data_disaggregation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
__version__ = "0.10.0"

# isort: skip_file -> keep order to prevent circular import
from .ext import transform_pandas
from .base import transform
from .classes import VT_Nominal, VT_Numeric, VT_NumericExt, VT_Ordinal
from . import actions, types

__all__ = [
"transform",
"transform_pandas",
"VT_Nominal",
"VT_Ordinal",
"VT_Numeric",
"VT_NumericExt",
]
__all__ = ["actions", "types"]
7 changes: 7 additions & 0 deletions data_disaggregation/actions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Functions to perform data transformations.
"""

from .base import transform
from .ext import transform_pandas

__all__ = ["transform", "transform_pandas"]
4 changes: 2 additions & 2 deletions data_disaggregation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

from typing import Mapping, Tuple

from .classes import F, T, V, VariableType, VT_NumericExt
from .types import F, T, V, VT_NumericExt, _AbstractVariableType
from .utils import (
as_set,
group_idx_first,
Expand All @@ -65,7 +65,7 @@


def transform(
vtype: VariableType,
vtype: _AbstractVariableType,
data: Mapping[F, V],
weight_map: Mapping[Tuple[F, T], float],
weights_from: Mapping[F, float] = None,
Expand Down
4 changes: 2 additions & 2 deletions data_disaggregation/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas import DataFrame, Index, MultiIndex, Series

from .base import transform
from .classes import SCALAR_DIM_NAME, SCALAR_INDEX_KEY, VariableType
from .types import SCALAR_DIM_NAME, SCALAR_INDEX_KEY, _AbstractVariableType
from .utils import is_scalar

IDX_SCALAR = MultiIndex.from_product([Index([SCALAR_INDEX_KEY], name=SCALAR_DIM_NAME)])
Expand Down Expand Up @@ -189,7 +189,7 @@ def validate_multiindex(item: Union[Index, Series, DataFrame]):


def transform_pandas(
vtype: VariableType,
vtype: _AbstractVariableType,
data: Union[DataFrame, Series, float],
weights: Union[Index, Series, Tuple[Union[Index, Series]]],
dim_in: Union[Index, Series] = None,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""classes and types
"""Type classes for data.
"""

from abc import ABC
Expand All @@ -15,7 +15,7 @@
SCALAR_INDEX_KEY = "__SCALAR__"


class VariableType(ABC):
class _AbstractVariableType(ABC):
@classmethod
def weighted_aggregate(cls, data):
"""aggregate data
Expand All @@ -33,7 +33,7 @@ def weighted_aggregate(cls, data):
raise NotImplementedError()


class VT_Nominal(VariableType):
class VT_Nominal(_AbstractVariableType):
"""Type class for nominal (categorical) data.
- Aggregation method: mode (most commonly used)
Expand All @@ -57,7 +57,7 @@ def weighted_aggregate(cls, data):
return utils.weighted_median(data)


class VT_Numeric(VariableType):
class VT_Numeric(_AbstractVariableType):
"""Type class for numerical, intensive data.
An intensive variable is one which does not scale with the system size.
Expand Down
8 changes: 4 additions & 4 deletions data_disaggregation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pandas import DataFrame, Index, Series

from . import classes
from . import types


def group_sum(key_vals: Mapping, get_key: Callable = None) -> Mapping:
Expand Down Expand Up @@ -177,16 +177,16 @@ def as_mapping(x, default_val=1) -> Mapping:
elif is_list(x):
return dict((k, default_val) for k in x)
elif is_scalar(x):
return {classes.SCALAR_INDEX_KEY: x}
return {types.SCALAR_INDEX_KEY: x}
raise TypeError(x)


def as_scalar(x):
if as_scalar(x):
return x
elif is_mapping(x):
assert set(x.keys()) == set([classes.SCALAR_INDEX_KEY])
return x[classes.SCALAR_INDEX_KEY]
assert set(x.keys()) == set([types.SCALAR_INDEX_KEY])
return x[types.SCALAR_INDEX_KEY]
raise TypeError(x)


Expand Down
8 changes: 7 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# API

::: data_disaggregation
::: data_disaggregation.types

::: data_disaggregation.actions
options:
members:
- transform
- transform_pandas
34 changes: 13 additions & 21 deletions docs/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,35 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Theory\n",
"\n",
"Conceptually, aggregation/disaggregation operations are\n",
"\n",
"* start with *indexed data* (index can be multidimensional)\n",
"* use a *weight map* to map data to a new (multidimensional) index. Each key is a pair of (old index, new index).\n",
"* group values for each unique in the new index and use a *weighted aggregation*, which depends on the *variable type*, e.g.nominal, ordinal, numerical (intensive, extensive)\n"
"# Usage"
]
},
{
"cell_type": "code",
"execution_count": 1,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"from pandas import Series, Index, MultiIndex\n",
"from data_disaggregation import transform, create_weight_map, VT_Numeric, VT_NumericExt"
"## Installation\n",
"\n",
"```bash\n",
" pip install data-disaggregation\n",
"```"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"To make the creation of the *weight map* easier, we create it from pandas Series:\n",
"\n",
"* if we don't specify the output dimensions, we use dims(weights) - dims(input)\n",
"* we create a cross product of all the (unique) input and output dimemnsions\n",
"* we join the weight on all the applicaple dimensions (so the weights dimensions must be a nonempty subset of the other dimensions)\n"
"## Quickstart Examples"
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"Note: for aggregation of extensive data, the weight value doe not really matter, but it does for intensive data. For disaggregation, it's the other way round."
"from pandas import Series, Index, MultiIndex\n",
"import data_disaggregation as dd"
]
},
{
Expand Down
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ plugins:
python:
options:
show_source: false
show_root_heading: true
show_root_full_path: true
docstring_style: numpy
- search:
lang: en
14 changes: 7 additions & 7 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,6 @@
from pandas import DataFrame, Index, MultiIndex, Series

from data_disaggregation.base import transform
from data_disaggregation.classes import (
SCALAR_INDEX_KEY,
VT_Nominal,
VT_Numeric,
VT_NumericExt,
VT_Ordinal,
)
from data_disaggregation.ext import (
COL_FROM,
COL_TO,
Expand All @@ -26,6 +19,13 @@
remap_series_to_frame,
transform_pandas,
)
from data_disaggregation.types import (
SCALAR_INDEX_KEY,
VT_Nominal,
VT_Numeric,
VT_NumericExt,
VT_Ordinal,
)
from data_disaggregation.utils import (
as_mapping,
group_idx_first,
Expand Down

0 comments on commit c3e7da5

Please sign in to comment.