<a href="https://colab.research.google.com/github/yoreva-01/proje/blob/main/Untitled19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd

# Yüklediğin dosyanın adını doğru gir
file_path = '/content/KDDTrain+.txt'

# NSL-KDD sütun isimleri (43 özellik + 1 etiket)
column_names = [
    "duration","protocol_type","service","flag","src_bytes","dst_bytes","land","wrong_fragment","urgent",
    "hot","num_failed_logins","logged_in","num_compromised","root_shell","su_attempted","num_root",
    "num_file_creations","num_shells","num_access_files","num_outbound_cmds","is_host_login",
    "is_guest_login","count","srv_count","serror_rate","srv_serror_rate","rerror_rate","srv_rerror_rate",
    "same_srv_rate","diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count",
    "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate",
    "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate",
    "dst_host_rerror_rate","dst_host_srv_rerror_rate","label","difficulty"
]

# Veriyi oku
df = pd.read_csv(file_path, names=column_names)

# 'difficulty' sütununu çıkar
df.drop('difficulty', axis=1, inplace=True)

# İlk birkaç satırı kontrol et
df.head()


Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label
0,0,tcp,http,SF,181,5450,0,0,0,0,...,9,1.0,0.0,0.11,0.0,0.0,0.0,0.0,0.0,normal.
1,0,tcp,http,SF,239,486,0,0,0,0,...,19,1.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,normal.
2,0,tcp,http,SF,235,1337,0,0,0,0,...,29,1.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,normal.
3,0,tcp,http,SF,219,1337,0,0,0,0,...,39,1.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,normal.
4,0,tcp,http,SF,217,2032,0,0,0,0,...,49,1.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,normal.


In [3]:
from sklearn.preprocessing import LabelEncoder

# Noktayı temizle: 'normal.' yerine 'normal' yap
df['label'] = df['label'].str.strip('.')

# Binary etiketleme: 'normal' = 0, diğerleri = 1
df['label'] = df['label'].apply(lambda x: 0 if x == 'normal' else 1)

# Kategorik sütunları One-Hot Encode et
categorical_cols = ['protocol_type', 'service', 'flag']
df = pd.get_dummies(df, columns=categorical_cols)

# X ve y ayır
X = df.drop('label', axis=1)
y = df['label']

# Son kontrol
print("Veri boyutu:", X.shape)
print("Saldırı oranı:\n", y.value_counts())


Veri boyutu: (494021, 118)
Saldırı oranı:
 label
1    396743
0     97278
Name: count, dtype: int64


In [4]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# Eğitim/test ayrımı yap
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# SMOTE ile eğitim verisini dengele
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

print("Yeni sınıf dağılımı:")
print(y_train_resampled.value_counts())


Yeni sınıf dağılımı:
label
1    317394
0    317394
Name: count, dtype: int64


In [5]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Modeli tanımla ve eğit
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train_resampled, y_train_resampled)

# Tahmin yap
y_pred = xgb.predict(X_test)

# Performans metriği
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))


Parameters: { "use_label_encoder" } are not used.



Confusion Matrix:
 [[19449     7]
 [   14 79335]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9993    0.9996    0.9995     19456
           1     0.9999    0.9998    0.9999     79349

    accuracy                         0.9998     98805
   macro avg     0.9996    0.9997    0.9997     98805
weighted avg     0.9998    0.9998    0.9998     98805



In [6]:
import joblib
joblib.dump(xgb, "model_v1.pkl")


['model_v1.pkl']

In [7]:
import joblib
joblib.dump(xgb, "model_v1.pkl")  # modeli kaydet


['model_v1.pkl']

In [8]:
import joblib

# Modeli model.pkl olarak kaydet
joblib.dump(xgb, 'model_v1.pkl')

print("✅ Model başarıyla model_v1.pkl olarak kaydedildi.")


✅ Model başarıyla model_v1.pkl olarak kaydedildi.


In [9]:
from google.colab import files

files.download('model_v1.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>