Merge pull request #661 from ras44/ras44/651_GraphViz

ras44/651 graph viz, resolves #651
uber · Aug 22, 2023 · 41aa6bd · 41aa6bd
2 parents c0e3ec5 + 645fd55
commit 41aa6bd
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 5 deletions.
diff --git a/.github/workflows/test-build-from-source.yml b/.github/workflows/test-build-from-source.yml
@@ -53,6 +53,8 @@ jobs:
       - name: install cxx-compiler 
         run: |
           conda install -c conda-forge cxx-compiler
+          conda install python-graphviz
+          conda install -c conda-forge xorg-libxrender
 
       - name: echo conda config
         run: |

diff --git a/README.md b/README.md
@@ -109,13 +109,14 @@ pip install -U numpy							# this step is necessary to fix [#338](https://github
 
 ## Install from source:
 
-### gcc, g++
-`gcc` and `g++` must be installed on the system to compile C/C++ libraries.
-
-For example, on ubuntu this can be done with:
+### Create a clean conda environment
 
 ```
-sudo apt-get install -y gcc g++
+conda create -n causalml-py38 python=3.8
+conda activate causalml-py38
+conda install -c conda-forge cxx-compiler
+conda install python-graphviz
+conda install -c conda-forge xorg-libxrender
 ```
 
 Then:
@@ -124,6 +125,7 @@ Then:
 git clone https://github.com/uber/causalml.git
 cd causalml
 pip install .
+python setup.py build_ext --inplace
 ```
 
 with `tensorflow`:

diff --git a/tests/test_uplift_trees.py b/tests/test_uplift_trees.py
@@ -8,6 +8,8 @@
 
 from causalml.inference.tree import UpliftTreeClassifier, UpliftRandomForestClassifier
 from causalml.metrics import get_cumgain
+from causalml.dataset import make_uplift_classification
+from causalml.inference.tree import uplift_tree_string, uplift_tree_plot
 
 from .const import RANDOM_SEED, N_SAMPLE, CONTROL_NAME, TREATMENT_NAMES, CONVERSION
 
@@ -247,3 +249,33 @@ def getNonleafCount(node):
     # would evaluate the same feature, thus the number of features with importance value
     # shouldn't be larger than the number of non-leaf node
     assert num_non_zero_imp_features <= num_non_leaf_nodes
+
+def test_uplift_tree_visualization():
+
+    # Data generation
+    df, x_names = make_uplift_classification()
+
+    # Rename features for easy interpretation of visualization
+    x_names_new = ['feature_%s'%(i) for i in range(len(x_names))]
+    rename_dict = {x_names[i]:x_names_new[i] for i in range(len(x_names))}
+    df = df.rename(columns=rename_dict)
+    x_names = x_names_new
+
+    df.head()
+
+    df = df[df['treatment_group_key'].isin(['control','treatment1'])]
+
+    # Split data to training and testing samples for model validation (next section)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=111)
+
+    # Train uplift tree
+    uplift_model = UpliftTreeClassifier(max_depth = 4, min_samples_leaf = 200, min_samples_treatment = 50, n_reg = 100, evaluationFunction='KL', control_name='control')
+
+    uplift_model.fit(df_train[x_names].values,
+                     treatment=df_train['treatment_group_key'].values,
+                     y=df_train['conversion'].values)
+
+    # Plot uplift tree
+    graph = uplift_tree_plot(uplift_model.fitted_uplift_tree,x_names)
+    graph.create_png()
+