unionai-oss · Abdullahi-Ahmed · Dec 30, 2022 · Dec 30, 2022 · Jan 7, 2023 · Jan 11, 2023
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -130,7 +130,7 @@ A UnionML app is composed of two core classes: a {class}`~unionml.dataset.Datase
 {class}`~unionml.model.Model`.
 
 In this example, we'll build a minimal UnionML app that classifies images of handwritten digits
-into their corresponding digit labels using [sklearn](https://scikit-learn.org/stable/),
+into their corresponding digit labels using [sklearn](https://scikit-learn.org/stable/), [xgboost](https://xgboost.ai/)
 [pytorch](https://pytorch.org/), or [keras](https://keras.io/).
 
 Create a python file called `app.py`, import app dependencies, and define `dataset` and `model` objects.
@@ -146,7 +146,24 @@ Create a python file called `app.py`, import app dependencies, and define `datas
       ```
 
    ```
+   ```{group-tab} xgboost
 
+      Install [xgboost](https://xgboost.ai/):
+
+      ```{prompt} bash
+      :prompts: $
+
+      pip install xgboost
+      ```
+
+      ```{literalinclude} ../../tests/integration/xgboost_app/quickstart.py
+      ---
+      lines: 1-24
+      ---
+      ```
+
+   ```  
+
    ```{group-tab} pytorch
 
       Install [pytorch](https://pytorch.org/):
@@ -208,6 +225,16 @@ exposed by the `dataset` and `model` objects:
 
    ```
 
+   ```{group-tab} xgboost
+
+      ```{literalinclude} ../../tests/integration/xgboost_app/quickstart.py
+      ---
+      lines: 27-44
+      ---
+      ```
+
+   ```
+
    ```{group-tab} pytorch
 
       First we'll define some helper functions to convert dataframes to tensors
@@ -257,6 +284,16 @@ to generate predictions.
 
    ```
 
+   ```{group-tab} xgboost
+
+      ```{literalinclude} ../../tests/integration/xgboost_app/quickstart.py
+      ---
+      lines: 46-52
+      ---
+      ```
+
+   ```
+
    ```{group-tab} pytorch
 
       ```{literalinclude} ../../tests/integration/pytorch_app/quickstart.py
@@ -305,6 +342,24 @@ endpoints with the `requests` library.
 
    ```
 
+   ```{group-tab} xgboost
+
+      Bind a FastAPI `app` to the `model` object with `model.serve`
+
+      ```{literalinclude} ../../tests/integration/xgboost_app/fastapi_app.py
+      ---
+      lines: 1,4-6
+      ---
+      ```
+
+      Start the server, assuming the UnionML app is in a `app.py` script
+
+      ```{code-block} bash
+      unionml serve app:app --reload --model-path /tmp/model_object.pickle
+      ```
+
+   ```
+
    ```{group-tab} pytorch
 
       Bind a FastAPI `app` to the `model` object with `model.serve`

diff --git a/tests/integration/test_fastapi.py b/tests/integration/test_fastapi.py
@@ -48,10 +48,11 @@ def assert_health_check():
     "ml_framework, model_cls_name, model_checker",
     [
         ("sklearn", "LogisticRegression", check_is_fitted),
+        ("xgboost", "XGBClassifier", None),
         ("pytorch", "PytorchModel", None),
         ("keras", "Sequential", None),
     ],
-    ids=["sklearn", "pytorch", "keras"],
+    ids=["sklearn", "xgboost", "pytorch", "keras"],
 )
 def test_module(ml_framework, model_cls_name, model_checker):
 
@@ -77,10 +78,11 @@ def test_module(ml_framework, model_cls_name, model_checker):
     "ml_framework, filename",
     [
         ("sklearn", "model.joblib"),
+        ("xgboost", "model.pickle ")
         ("pytorch", "model.pt"),
         ("keras", "model.h5"),
     ],
-    ids=["sklearn", "pytorch", "keras"],
+    ids=["sklearn", "xgboost", "pytorch", "keras"],
 )
 def test_fastapi_app(ml_framework, filename, tmp_path):
     # run the quickstart module to train a model

diff --git a/tests/integration/xgboost_app/__init__.py b/tests/integration/xgboost_app/__init__.py
diff --git a/tests/integration/xgboost_app/fastapi_app.py b/tests/integration/xgboost_app/fastapi_app.py
@@ -0,0 +1,6 @@
+from fastapi import FastAPI
+
+from tests.integration.xgboost_app.quickstart import model
+
+app = FastAPI()
+model.serve(app)
diff --git a/tests/integration/xgboost_app/quickstart.py b/tests/integration/xgboost_app/quickstart.py
@@ -0,0 +1,52 @@
+from typing import List
+import pandas as pd
+from sklearn.datasets import load_digits
+from xgboost import XGBClassifier
+from sklearn.metrics import accuracy_score
+
+from unionml import Dataset, Model
+
+# Declare constants and variables at the top of the file
+DATASET_NAME = "digits_dataset"
+MODEL_NAME = "digits_classifier"
+TEST_SIZE = 0.2
+SHUFFLE = True
+TARGETS = ["target"]
+PARAMS = {
+    'max_depth': 4, 
+    'eta': 0.1, 
+    'sampling_method': 'gradient_based', 
+    'num_class': 3
+}
+
+# Create instances of Dataset and Model
+dataset = Dataset(name=DATASET_NAME, test_size=TEST_SIZE, shuffle=SHUFFLE, targets=TARGETS)
+model = Model(name=MODEL_NAME, init=XGBClassifier, dataset=dataset)
+
+# Define reader function
+@dataset.reader
+def reader() -> pd.DataFrame:
+    return load_digits(as_frame=True).frame
+
+# Define trainer function
+@model.trainer
+def trainer(estimator: XGBClassifier, features: pd.DataFrame, target: pd.DataFrame) -> XGBClassifier:
+    return estimator.fit(features, target.squeeze())
+
+# Define predictor function
+@model.predictor
+def predictor(estimator: XGBClassifier, features: pd.DataFrame) -> List[float]:
+    return [float(x) for x in estimator.predict(features)]
+
+# Define evaluator function
+@model.evaluator
+def evaluator(estimator: XGBClassifier, features: pd.DataFrame, target: pd.DataFrame) -> float:
+    return float(accuracy_score(target.squeeze(), predictor(estimator, features)))
+
+if __name__ == "__main__":
+    model_object, metrics = model.train(hyperparameters=PARAMS)
+    predictions = model.predict(features=load_digits(as_frame=True).frame.sample(5, random_state=42))
+    print(model_object, metrics, predictions, sep="\n")
+
+    # save model to a file
+    model.save('/tmp/model_object.pickle')