In [None]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Create a synthetic dataset (1000 samples, 20 features)
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
                           n_redundant=5, n_classes=2, random_state=42)

# 2. Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. Initialize classifiers
rf = RandomForestClassifier(n_estimators=100, random_state=42)
lr = LogisticRegression(max_iter=1000, random_state=42)

# 5. Train models
rf.fit(X_train_scaled, y_train)
lr.fit(X_train_scaled, y_train)

# 6. Make predictions
rf_preds = rf.predict(X_test_scaled)
lr_preds = lr.predict(X_test_scaled)

# 7. Calculate accuracy
rf_acc = accuracy_score(y_test, rf_preds)
lr_acc = accuracy_score(y_test, lr_preds)

# 8. Display results
print("Random Forest Accuracy:", rf_acc)
print("Logistic Regression Accuracy:", lr_acc)

if rf_acc > lr_acc:
    print("Random Forest performed better.")
elif lr_acc > rf_acc:
    print("Logistic Regression performed better.")
else:
    print("Both classifiers performed equally.")


Random Forest Accuracy: 0.915
Logistic Regression Accuracy: 0.8
Random Forest performed better.
