In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

df_new = pd.read_csv(r"path dataset")

df_new.drop_duplicates(inplace=True)

df_new = df_new.select_dtypes(exclude=['object'])

if 'label' not in df_new.columns:
    raise ValueError("العمود 'label' غير موجود في مجموعة البيانات بعد إعادة إضافته.")

df_new.fillna(df_new.mean(), inplace=True)

df_new.replace([np.inf, -np.inf], np.nan, inplace=True)  
df_new.fillna(df_new.mean(), inplace=True) 

if df_new.isnull().values.any():
    raise ValueError("البيانات تحتوي على قيم مفقودة بعد المعالجة.")

df_train_new, df_test_new = train_test_split(df_new, test_size=0.20, random_state=42)

X_df_train_new = df_train_new.drop('label', axis=1)
Y_df_train_new = df_train_new['label']
X_df_test_new = df_test_new.drop('label', axis=1)
Y_df_test_new = df_test_new['label']

scaler = StandardScaler()
X_df_train_new = scaler.fit_transform(X_df_train_new)
X_df_test_new = scaler.transform(X_df_test_new)

clf = SGDClassifier(alpha=0.001, penalty='elasticnet', l1_ratio=0.15)

classes = np.unique(Y_df_train_new)

batch_size = 100
n_batches = int(np.ceil(X_df_train_new.shape[0] / batch_size))

for i in range(n_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, X_df_train_new.shape[0])
    X_batch = X_df_train_new[start_idx:end_idx]
    y_batch = Y_df_train_new.iloc[start_idx:end_idx]

    clf.partial_fit(X_batch, y_batch, classes=classes)

Y_pred_new = clf.predict(X_df_test_new)

accuracy = accuracy_score(Y_df_test_new, Y_pred_new)
precision = precision_score(Y_df_test_new, Y_pred_new, average='weighted')
recall = recall_score(Y_df_test_new, Y_pred_new, average='weighted')
f1 = f1_score(Y_df_test_new, Y_pred_new, average='weighted')

conf_matrix = confusion_matrix(Y_df_test_new, Y_pred_new)
print("Confusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
label_counts = Y_df_test_new.value_counts()
print("Number of samples for each label in the test data:")
print(label_counts)


joblib.dump(clf, 'partial_fitIoT2023.pkl')
print("The model has been saved successfully as 'partial_fitIoT2023.pkl'.")
