In [2]:
# 📦 Import Common Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# 📂 Load the Dataset
data = pd.read_csv('social_media_test.csv')

# 🎯 Separate Features and Labels
X = data.drop('label', axis=1)
y = data['label']

# 🔠 Define Categorical Columns for One-Hot Encoding
categorical_cols = ['profile_pic', 'sim_name_username', 'len_desc', 'extern_url', 'private']

# 🧼 Preprocessing: OneHotEncoder for Categorical Columns
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)],
    remainder='passthrough'
)

# 🧪 Split Data into Train/Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

# 🔁 Define Pipeline
gb_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42))
])

# 🧠 Train & Evaluate
gb_pipeline.fit(X_train, y_train)
gb_preds = gb_pipeline.predict(X_test)

# 📊 Evaluate Metrics
precision = precision_score(y_test, gb_preds)
recall = recall_score(y_test, gb_preds)
f1 = f1_score(y_test, gb_preds)
print(f"Gradient Boosting - Precision: {precision}, Recall: {recall}, F1 Score: {f1}")


Gradient Boosting - Precision: 1.0, Recall: 1.0, F1 Score: 1.0


In [4]:
from sklearn.ensemble import RandomForestClassifier

rf_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42))
])

rf_pipeline.fit(X_train, y_train)
rf_preds = rf_pipeline.predict(X_test)

rf_accuracy = (rf_preds == y_test).mean()
print(f"Random Forest - Accuracy: {rf_accuracy}")


Random Forest - Accuracy: 1.0


In [5]:
from sklearn.naive_bayes import MultinomialNB

nb_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', MultinomialNB())
])

nb_pipeline.fit(X_train, y_train)
nb_preds = nb_pipeline.predict(X_test)

nb_accuracy = (nb_preds == y_test).mean()
print(f"Naive Bayes - Accuracy: {nb_accuracy}")


Naive Bayes - Accuracy: 0.5833333333333334


In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

ada_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=1),
        n_estimators=50,
        random_state=42
    ))
])

ada_pipeline.fit(X_train, y_train)
ada_preds = ada_pipeline.predict(X_test)

ada_accuracy = (ada_preds == y_test).mean()
print(f"AdaBoost - Accuracy: {ada_accuracy}")


AdaBoost - Accuracy: 1.0


In [7]:
from sklearn.svm import SVC

svm_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', SVC(probability=True, kernel='linear'))
])

svm_pipeline.fit(X_train, y_train)
svm_preds = svm_pipeline.predict(X_test)

svm_accuracy = (svm_preds == y_test).mean()
print(f"SVM - Accuracy: {svm_accuracy}")


SVM - Accuracy: 1.0


In [8]:
from xgboost import XGBClassifier

xgb_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(learning_rate=0.1, n_estimators=100, max_depth=3, random_state=42))
])

xgb_pipeline.fit(X_train, y_train)
xgb_preds = xgb_pipeline.predict(X_test)

xgb_accuracy = (xgb_preds == y_test).mean()
print(f"XGBoost - Accuracy: {xgb_accuracy}")


XGBoost - Accuracy: 0.9583333333333334


In [9]:
results = {
    "Model": ["Gradient Boosting", "Random Forest", "Naive Bayes", "AdaBoost", "SVM", "XGBoost"],
    "Accuracy": [None, rf_accuracy, nb_accuracy, ada_accuracy, svm_accuracy, xgb_accuracy],
    "Precision": [precision, None, None, None, None, None],
    "Recall": [recall, None, None, None, None, None],
    "F1 Score": [f1, None, None, None, None, None]
}

summary = pd.DataFrame(results)
print(summary)


               Model  Accuracy  Precision  Recall  F1 Score
0  Gradient Boosting       NaN        1.0     1.0       1.0
1      Random Forest  1.000000        NaN     NaN       NaN
2        Naive Bayes  0.583333        NaN     NaN       NaN
3           AdaBoost  1.000000        NaN     NaN       NaN
4                SVM  1.000000        NaN     NaN       NaN
5            XGBoost  0.958333        NaN     NaN       NaN
