update documentation

yzhao062 · Jul 28, 2019 · b30b497 · b30b497
1 parent cafc970
commit b30b497
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 80 deletions.
diff --git a/README.rst b/README.rst
@@ -89,6 +89,26 @@ combo is featured for:
 * **Optimized performance with JIT and parallelization** when possible, using `numba <https://github.com/numba/numba>`_ and `joblib <https://github.com/joblib/joblib>`_.
 
 
+**API Demo**\ :
+
+
+   .. code-block:: python
+
+
+       from combo.models.stacking import Stacking
+       # base classifiers
+       classifiers = [DecisionTreeClassifier(), LogisticRegression(),
+                      KNeighborsClassifier(), RandomForestClassifier(),
+                      GradientBoostingClassifier()]
+
+       clf = Stacking(base_clfs=classifiers) # initialize a Stacking model
+       clf.fit(X_train)
+
+       # predict on unseen data
+       y_test_labels = clf.predict(X_test)  # label prediction
+       y_test_proba = clf.predict_proba(X_test)  # probability prediction
+
+
 **Table of Contents**\ :
 
 
@@ -211,11 +231,9 @@ demonstrates the basic API of predicting with multiple classifiers. **It is note
 
 
        # initialize a group of classifiers
-       classifiers = [DecisionTreeClassifier(random_state=random_state),
-                      LogisticRegression(random_state=random_state),
-                      KNeighborsClassifier(),
-                      RandomForestClassifier(random_state=random_state),
-                      GradientBoostingClassifier(random_state=random_state)]
+       classifiers = [DecisionTreeClassifier(), LogisticRegression(),
+                      KNeighborsClassifier(), RandomForestClassifier(),
+                      GradientBoostingClassifier()]
 
 
 #. Initialize, fit, predict, and evaluate with a simple aggregator (average)
@@ -308,11 +326,9 @@ demonstrates the basic API of stacking (meta ensembling).
 
 
        # initialize a group of classifiers
-       classifiers = [DecisionTreeClassifier(random_state=random_state),
-                      LogisticRegression(random_state=random_state),
-                      KNeighborsClassifier(),
-                      RandomForestClassifier(random_state=random_state),
-                      GradientBoostingClassifier(random_state=random_state)]
+       classifiers = [DecisionTreeClassifier(), LogisticRegression(),
+                      KNeighborsClassifier(), RandomForestClassifier(),
+                      GradientBoostingClassifier()]
 
 
 #. Initialize, fit, predict, and evaluate with Stacking

diff --git a/docs/index.rst b/docs/index.rst
@@ -94,6 +94,26 @@ combo is featured for:
 * **Optimized performance with JIT and parallelization** when possible, using `numba <https://github.com/numba/numba>`_ and `joblib <https://github.com/joblib/joblib>`_.
 
 
+**API Demo**\ :
+
+
+   .. code-block:: python
+
+
+       from combo.models.stacking import Stacking
+       # base classifiers
+       classifiers = [DecisionTreeClassifier(), LogisticRegression(),
+                      KNeighborsClassifier(), RandomForestClassifier(),
+                      GradientBoostingClassifier()]
+
+       clf = Stacking(base_clfs=classifiers) # initialize a Stacking model
+       clf.fit(X_train)
+
+       # predict on unseen data
+       y_test_labels = clf.predict(X_test)  # label prediction
+       y_test_proba = clf.predict_proba(X_test)  # probability prediction
+
+
 ----
 
 

diff --git a/examples/temp_do_not_use.py b/examples/temp_do_not_use.py
@@ -1,70 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Example of combining multiple base classifiers. Two combination
-frameworks are demonstrated:
-
-1. Average: take the average of all base detectors
-2. maximization : take the maximum score across all detectors as the score
-
-"""
-# Author: Yue Zhao <zhaoy@cmu.edu>
-# License: BSD 2 clause
-
-
-import os
-import sys
-
-# temporary solution for relative imports in case combo is not installed
-# if combo is installed, no need to use the following line
-sys.path.append(
-    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
-
-import numpy as np
-
-from sklearn.cluster import KMeans
-from sklearn.cluster import MiniBatchKMeans
-from sklearn.cluster import AgglomerativeClustering
-
-from sklearn.datasets import load_breast_cancer
-from sklearn.preprocessing import StandardScaler
-
-from combo.models.cluster_comb import clusterer_ensemble_scores
-from combo.models.cluster_comb import ClustererEnsemble
-from combo.utils.utility import generate_bagging_indices
-
-import warnings
-
-warnings.filterwarnings("ignore")
-
-if __name__ == "__main__":
-    # Define data file and read X and y
-    random_state = 42
-    X, y = load_breast_cancer(return_X_y=True)
-
-    n_clusters = 5
-    n_estimators = 3
-
-    # Initialize a set of estimators
-    estimators = [KMeans(n_clusters=n_clusters),
-                  MiniBatchKMeans(n_clusters=n_clusters),
-                  AgglomerativeClustering(n_clusters=n_clusters)]
-
-    clf = ClustererEnsemble(estimators, n_clusters=n_clusters)
-    clf.fit(X)
-    predicted_labels = clf.labels_
-    aligned_labels = clf.aligned_labels_
-
-    # Clusterer Ensemble without ininializing a new Class
-    original_labels = np.zeros([X.shape[0], n_estimators])
-
-    for i, estimator in enumerate(estimators):
-        estimator.fit(X)
-        original_labels[:, i] = estimator.labels_
-
-    # Invoke method directly without initialiing a new Class
-    labels_by_vote1 = clusterer_ensemble_scores(original_labels, n_estimators,
-                                                n_clusters)
-    labels_by_vote2, aligned_labels = clusterer_ensemble_scores(
-        original_labels, n_estimators, n_clusters, return_results=True)
-
-    labels_by_vote3 = clusterer_ensemble_scores(original_labels, n_estimators,
-                                                n_clusters, reference_idx=1)