In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
# Generate a simulated binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
# Initialize an online learning model with logistic regression loss
model = SGDClassifier(loss='log_loss', learning_rate='optimal', max_iter=1, warm_start=True)


In [5]:
# Train the model in small batches to simulate streaming
batch_size = 100
n_batches = int(np.ceil(len(X_train) / batch_size))

print("Training with online batches...\n")
for i in range(n_batches):
    start = i * batch_size
    end = start + batch_size
    X_batch = X_train[start:end]
    y_batch = y_train[start:end]
    model.partial_fit(X_batch, y_batch, classes=np.unique(y))  # Online update
    acc = model.score(X_batch, y_batch)
    print(f"Batch {i+1}/{n_batches} Accuracy: {acc:.2f}")


Training with online batches...

Batch 1/7 Accuracy: 0.77
Batch 2/7 Accuracy: 0.82
Batch 3/7 Accuracy: 0.89
Batch 4/7 Accuracy: 0.87
Batch 5/7 Accuracy: 0.77
Batch 6/7 Accuracy: 0.84
Batch 7/7 Accuracy: 0.86


In [6]:
# Evaluate on test set
y_pred = model.predict(X_test)
print("\nFinal Evaluation on Test Data:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")


Final Evaluation on Test Data:
Accuracy: 0.82
