In [16]:
import time

import numpy as np

import pandas as pd

from catboost import CatBoostClassifier

from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

In [17]:
# Generate a large synthetic dataset

X,y  = make_classification(

    n_samples=100000,
    n_features=20,
    random_state=42
)

In [18]:
# Split the Data

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

In [19]:
# Initialize and train CatBoost on CPU

cpu_model = CatBoostClassifier(
    iterations=500, 
    depth=6,
    learning_rate=0.01, 
    random_state=42,
    verbose=100 )


start_time = time.time()

cpu_model.fit(X_train,y_train, eval_set=(X_test,y_test) , verbose=100)

cpu_time = time.time() - start_time

0:	learn: 0.6830033	test: 0.6830379	best: 0.6830379 (0)	total: 5.75ms	remaining: 2.87s
100:	learn: 0.3371647	test: 0.3391375	best: 0.3391375 (100)	total: 634ms	remaining: 2.5s
200:	learn: 0.3013799	test: 0.3043372	best: 0.3043372 (200)	total: 1.27s	remaining: 1.89s
300:	learn: 0.2937934	test: 0.2974295	best: 0.2974295 (300)	total: 1.91s	remaining: 1.26s
400:	learn: 0.2914085	test: 0.2957016	best: 0.2956985 (398)	total: 2.55s	remaining: 630ms
499:	learn: 0.2900212	test: 0.2952432	best: 0.2952409 (496)	total: 3.19s	remaining: 0us

bestTest = 0.2952409361
bestIteration = 496

Shrink model to first 497 iterations.


In [20]:
# Make predictions and evaluate

cpu_preds = cpu_model.predict(X_test)

cpu_acc = accuracy_score(y_test, cpu_preds)

In [None]:
# Initialize and train CatBoost on GPU

gpu_model = CatBoostClassifier(
    iterations=500, 
    depth=6,
    learning_rate=0.01, 
    random_state=42,
    verbose=100,
    task_type='GPU',    
    )


start_time = time.time()

gpu_model.fit(X_train,y_train, eval_set=(X_test,y_test) , verbose=100)

gpu_time = time.time() - start_time

In [None]:
# Make predictions and evaluate

gpu_preds = gpu_model.predict(X_test)

gpu_acc = accuracy_score(y_test, gpu_preds)

In [21]:
# Compare results

print(f"CPU Accuracy: {cpu_acc:.4f}, Training Time: {cpu_time:.2f} sec")


CPU Accuracy: 0.8730, Training Time: 3.62 sec


In [None]:
print(f"GPU Accuracy: {gpu_acc:.4f}, Training Time: {gpu_time:.2f} sec")