Skip to content

Commit

Permalink
Reformat code using black (#474)
Browse files Browse the repository at this point in the history
* reformat code using black
* use latest black (22.1.0) to be consistent with the one used by GitHub Action
* update CONTRIBUTING.md with information about black
  • Loading branch information
jeongyoonlee committed Feb 14, 2022
1 parent 92767c3 commit 7dec7fe
Show file tree
Hide file tree
Showing 53 changed files with 4,223 additions and 2,317 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: psf/black@stable
12 changes: 9 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ To contribute to it, please follow guidelines here.

The codebase is hosted on Github at https://github.com/uber/causalml.

All code need to follow the [PEP8 style guide](https://www.python.org/dev/peps/pep-0008/) with a few exceptions listed in [tox.ini](./tox.ini).
We use [`black`](https://black.readthedocs.io/en/stable/index.html) as a formatter to keep the coding style and format across all Python files consistent and compliant with [PEP8](https://www.python.org/dev/peps/pep-0008/). We recommend that you add `black` to your IDE as a formatter (see the [instruction](https://black.readthedocs.io/en/stable/integrations/editors.html)) or run `black` on the command line before submitting a PR as follows:
```bash
# move to the top directory of the causalml repository
$ cd causalml
$ pip install -U black
$ black .
```

Before contributing, please review outstanding issues.
If you'd like to contribute to something else, open an issue for discussion first.
As a start, please check out outstanding [issues](https://github.com/uber/causalml/issues).
If you'd like to contribute to something else, open a new issue for discussion first.

## Development Workflow :computer:

Expand Down
22 changes: 12 additions & 10 deletions causalml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
name = 'causalml'
__version__ = '0.12.1'
__all__ = ['dataset',
'features',
'feature_selection',
'inference',
'match',
'metrics',
'optimize',
'propensity']
name = "causalml"
__version__ = "0.12.1"
__all__ = [
"dataset",
"features",
"feature_selection",
"inference",
"match",
"metrics",
"optimize",
"propensity",
]
175 changes: 117 additions & 58 deletions causalml/dataset/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,39 @@
from sklearn.datasets import make_classification


def make_uplift_classification(n_samples=1000,
treatment_name=['control', 'treatment1', 'treatment2', 'treatment3'],
y_name='conversion',
n_classification_features=10,
n_classification_informative=5,
n_classification_redundant=0,
n_classification_repeated=0,
n_uplift_increase_dict={'treatment1': 2, 'treatment2': 2, 'treatment3': 2},
n_uplift_decrease_dict={'treatment1': 0, 'treatment2': 0, 'treatment3': 0},
delta_uplift_increase_dict={'treatment1': 0.02, 'treatment2': 0.05, 'treatment3': 0.1},
delta_uplift_decrease_dict={'treatment1': 0., 'treatment2': 0., 'treatment3': 0.},
n_uplift_increase_mix_informative_dict={'treatment1': 1, 'treatment2': 1, 'treatment3': 1},
n_uplift_decrease_mix_informative_dict={'treatment1': 0, 'treatment2': 0, 'treatment3': 0},
positive_class_proportion=0.5,
random_seed=20190101):
def make_uplift_classification(
n_samples=1000,
treatment_name=["control", "treatment1", "treatment2", "treatment3"],
y_name="conversion",
n_classification_features=10,
n_classification_informative=5,
n_classification_redundant=0,
n_classification_repeated=0,
n_uplift_increase_dict={"treatment1": 2, "treatment2": 2, "treatment3": 2},
n_uplift_decrease_dict={"treatment1": 0, "treatment2": 0, "treatment3": 0},
delta_uplift_increase_dict={
"treatment1": 0.02,
"treatment2": 0.05,
"treatment3": 0.1,
},
delta_uplift_decrease_dict={
"treatment1": 0.0,
"treatment2": 0.0,
"treatment3": 0.0,
},
n_uplift_increase_mix_informative_dict={
"treatment1": 1,
"treatment2": 1,
"treatment3": 1,
},
n_uplift_decrease_mix_informative_dict={
"treatment1": 0,
"treatment2": 0,
"treatment3": 0,
},
positive_class_proportion=0.5,
random_seed=20190101,
):
"""Generate a synthetic dataset for classification uplift modeling problem.
Parameters
Expand Down Expand Up @@ -90,33 +108,44 @@ def make_uplift_classification(n_samples=1000,
for ti in treatment_name:
treatment_list += [ti] * n_samples
treatment_list = np.random.permutation(treatment_list)
df_res['treatment_group_key'] = treatment_list
df_res["treatment_group_key"] = treatment_list

# generate features and labels
X1, Y1 = make_classification(n_samples=n_all, n_features=n_classification_features,
n_informative=n_classification_informative, n_redundant=n_classification_redundant,
n_repeated=n_classification_repeated, n_clusters_per_class=1,
weights=[1-positive_class_proportion, positive_class_proportion])
X1, Y1 = make_classification(
n_samples=n_all,
n_features=n_classification_features,
n_informative=n_classification_informative,
n_redundant=n_classification_redundant,
n_repeated=n_classification_repeated,
n_clusters_per_class=1,
weights=[1 - positive_class_proportion, positive_class_proportion],
)

x_name = []
x_informative_name = []
for xi in range(n_classification_informative):
x_name_i = 'x' + str(len(x_name)+1) + '_informative'
x_name_i = "x" + str(len(x_name) + 1) + "_informative"
x_name.append(x_name_i)
x_informative_name.append(x_name_i)
df_res[x_name_i] = X1[:, xi]
for xi in range(n_classification_redundant):
x_name_i = 'x' + str(len(x_name)+1) + '_redundant'
x_name_i = "x" + str(len(x_name) + 1) + "_redundant"
x_name.append(x_name_i)
df_res[x_name_i] = X1[:, n_classification_informative+xi]
df_res[x_name_i] = X1[:, n_classification_informative + xi]
for xi in range(n_classification_repeated):
x_name_i = 'x' + str(len(x_name)+1) + '_repeated'
x_name_i = "x" + str(len(x_name) + 1) + "_repeated"
x_name.append(x_name_i)
df_res[x_name_i] = X1[:, n_classification_informative+n_classification_redundant+xi]

for xi in range(n_classification_features - n_classification_informative - n_classification_redundant
- n_classification_repeated):
x_name_i = 'x' + str(len(x_name)+1) + '_irrelevant'
df_res[x_name_i] = X1[
:, n_classification_informative + n_classification_redundant + xi
]

for xi in range(
n_classification_features
- n_classification_informative
- n_classification_redundant
- n_classification_repeated
):
x_name_i = "x" + str(len(x_name) + 1) + "_irrelevant"
x_name.append(x_name_i)
df_res[x_name_i] = np.random.normal(0, 1, n_all)

Expand All @@ -127,57 +156,87 @@ def make_uplift_classification(n_samples=1000,

# generate uplift (positive)
for treatment_key_i in treatment_name:
treatment_index = df_res.index[df_res['treatment_group_key'] == treatment_key_i].tolist()
if treatment_key_i in n_uplift_increase_dict and n_uplift_increase_dict[treatment_key_i] > 0:
treatment_index = df_res.index[
df_res["treatment_group_key"] == treatment_key_i
].tolist()
if (
treatment_key_i in n_uplift_increase_dict
and n_uplift_increase_dict[treatment_key_i] > 0
):
x_uplift_increase_name = []
adjust_class_proportion = (delta_uplift_increase_dict[treatment_key_i]) / (1-positive_class_proportion)
X_increase, Y_increase = make_classification(n_samples=n_all,
n_features=n_uplift_increase_dict[treatment_key_i],
n_informative=n_uplift_increase_dict[treatment_key_i],
n_redundant=0,
n_clusters_per_class=1,
weights=[1-adjust_class_proportion, adjust_class_proportion])
adjust_class_proportion = (delta_uplift_increase_dict[treatment_key_i]) / (
1 - positive_class_proportion
)
X_increase, Y_increase = make_classification(
n_samples=n_all,
n_features=n_uplift_increase_dict[treatment_key_i],
n_informative=n_uplift_increase_dict[treatment_key_i],
n_redundant=0,
n_clusters_per_class=1,
weights=[1 - adjust_class_proportion, adjust_class_proportion],
)
for xi in range(n_uplift_increase_dict[treatment_key_i]):
x_name_i = 'x' + str(len(x_name)+1) + '_uplift_increase'
x_name_i = "x" + str(len(x_name) + 1) + "_uplift_increase"
x_name.append(x_name_i)
x_uplift_increase_name.append(x_name_i)
df_res[x_name_i] = X_increase[:, xi]
Y[treatment_index] = Y[treatment_index] + Y_increase[treatment_index]
if n_uplift_increase_mix_informative_dict[treatment_key_i] > 0:
for xi in range(n_uplift_increase_mix_informative_dict[treatment_key_i]):
x_name_i = 'x' + str(len(x_name)+1) + '_increase_mix'
for xi in range(
n_uplift_increase_mix_informative_dict[treatment_key_i]
):
x_name_i = "x" + str(len(x_name) + 1) + "_increase_mix"
x_name.append(x_name_i)
df_res[x_name_i] = (np.random.uniform(-1, 1) * df_res[np.random.choice(x_informative_name)]
+ np.random.uniform(-1, 1) * df_res[np.random.choice(x_uplift_increase_name)])
df_res[x_name_i] = (
np.random.uniform(-1, 1)
* df_res[np.random.choice(x_informative_name)]
+ np.random.uniform(-1, 1)
* df_res[np.random.choice(x_uplift_increase_name)]
)

# generate uplift (negative)
for treatment_key_i in treatment_name:
treatment_index = df_res.index[df_res['treatment_group_key'] == treatment_key_i].tolist()
if treatment_key_i in n_uplift_decrease_dict and n_uplift_decrease_dict[treatment_key_i] > 0:
treatment_index = df_res.index[
df_res["treatment_group_key"] == treatment_key_i
].tolist()
if (
treatment_key_i in n_uplift_decrease_dict
and n_uplift_decrease_dict[treatment_key_i] > 0
):
x_uplift_decrease_name = []
adjust_class_proportion = (delta_uplift_decrease_dict[treatment_key_i]) / (1-positive_class_proportion)
X_decrease, Y_decrease = make_classification(n_samples=n_all,
n_features=n_uplift_decrease_dict[treatment_key_i],
n_informative=n_uplift_decrease_dict[treatment_key_i],
n_redundant=0,
n_clusters_per_class=1,
weights=[1-adjust_class_proportion, adjust_class_proportion])
adjust_class_proportion = (delta_uplift_decrease_dict[treatment_key_i]) / (
1 - positive_class_proportion
)
X_decrease, Y_decrease = make_classification(
n_samples=n_all,
n_features=n_uplift_decrease_dict[treatment_key_i],
n_informative=n_uplift_decrease_dict[treatment_key_i],
n_redundant=0,
n_clusters_per_class=1,
weights=[1 - adjust_class_proportion, adjust_class_proportion],
)
for xi in range(n_uplift_decrease_dict[treatment_key_i]):
x_name_i = 'x' + str(len(x_name)+1) + '_uplift_decrease'
x_name_i = "x" + str(len(x_name) + 1) + "_uplift_decrease"
x_name.append(x_name_i)
x_uplift_decrease_name.append(x_name_i)
df_res[x_name_i] = X_decrease[:, xi]
Y[treatment_index] = Y[treatment_index] - Y_decrease[treatment_index]
if n_uplift_decrease_mix_informative_dict[treatment_key_i] > 0:
for xi in range(n_uplift_decrease_mix_informative_dict[treatment_key_i]):
x_name_i = 'x' + str(len(x_name)+1) + '_decrease_mix'
for xi in range(
n_uplift_decrease_mix_informative_dict[treatment_key_i]
):
x_name_i = "x" + str(len(x_name) + 1) + "_decrease_mix"
x_name.append(x_name_i)
df_res[x_name_i] = (np.random.uniform(-1, 1) * df_res[np.random.choice(x_informative_name)]
+ np.random.uniform(-1, 1) * df_res[np.random.choice(x_uplift_decrease_name)])
df_res[x_name_i] = (
np.random.uniform(-1, 1)
* df_res[np.random.choice(x_informative_name)]
+ np.random.uniform(-1, 1)
* df_res[np.random.choice(x_uplift_decrease_name)]
)

# truncate Y
Y = np.clip(Y, 0, 1)

df_res[y_name] = Y
df_res['treatment_effect'] = Y - Y1
df_res["treatment_effect"] = Y - Y1
return df_res, x_name
Loading

0 comments on commit 7dec7fe

Please sign in to comment.