In [3]:
# Gerekli kütüphaneleri yükleyelim
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix

# ---------------------- 1. Veri Seti Yükleme ve Sütun Adları ----------------------
columns = [
    "id", "diagnosis",
    "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean",
    "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean",
    "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se",
    "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se",
    "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst",
    "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"
]

df = pd.read_csv("wdbc.csv", names=columns, header=None)

# ---------------------- 2. Etiket Kodlama ----------------------
df["diagnosis"] = df["diagnosis"].map({'B': 0, 'M': 1})

# ---------------------- 3. Redundant Öznitelikleri Çıkar ----------------------

def find_highly_correlated_features(dataframe, threshold=0.9):
    """
    Bu fonksiyon, birbirleriyle yüksek korelasyona sahip olan öznitelikleri bulur ve döndürür.
    Parametre: threshold (float): Korelasyon eşiği.
    """
    corr_matrix = dataframe.corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    return to_drop

# Örnek kullanım (df'de sayısal öznitelikler olmalı)
numerical_df = df.iloc[:, 2:]  # ID ve hedef hariç sayısal sütunlar
redundant_features = find_highly_correlated_features(numerical_df, threshold=0.9)

# ---------------------- 4. Giriş ve hedef değişkeni ayır ----------------------

X = df.drop("diagnosis", axis=1)
y = df["diagnosis"]

# ---------------------- 5. Veriyi Ölçeklendir ----------------------

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ---------------------- 6. Eğitim ve test kümeleri ----------------------

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ---------------------- 7. SVM modelini oluştur ve eğit ----------------------

model = SVC(probability=True, kernel='linear')  # Kernel olarak 'linear' seçildi, istenirse başka bir kernel de seçilebilir
model.fit(X_train, y_train)

# ---------------------- 8. Test verisi üzerinden tahmin yapalım ----------------------

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# ---------------------- 9. Model Performansını Değerlendirme ----------------------

print("✅ Doğruluk (Accuracy):", accuracy_score(y_test, y_pred))
print("🎯 ROC AUC:", roc_auc_score(y_test, y_proba))
print("\n📋 Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred, target_names=["Benign", "Malignant"]))


✅ Doğruluk (Accuracy): 0.956140350877193
🎯 ROC AUC: 0.9963969865705863

📋 Sınıflandırma Raporu:
              precision    recall  f1-score   support

      Benign       0.97      0.96      0.96        71
   Malignant       0.93      0.95      0.94        43

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114



In [1]:
# Gerekli kütüphaneleri yükleyelim
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from imblearn.over_sampling import SMOTE

In [2]:
# ---------------------- 1. Veri Seti Yükleme ve Sütun Adları ----------------------
columns = [
    "id", "diagnosis",
    "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean",
    "compactness_mean", "concavity_mean", "concave_points_mean", "symmetry_mean", "fractal_dimension_mean",
    "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se",
    "concavity_se", "concave_points_se", "symmetry_se", "fractal_dimension_se",
    "radius_worst", "texture_worst", "perimeter_worst", "area_worst", "smoothness_worst",
    "compactness_worst", "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"
]

df = pd.read_csv("wdbc.csv", names=columns, header=None)


In [3]:
# ---------------------- 2. Etiket Kodlama ----------------------
df["diagnosis"] = df["diagnosis"].map({'B': 0, 'M': 1})

# ---------------------- 3. Redundant Öznitelikleri Çıkar ----------------------

def kolerasyon_kaldir(dataframe, threshold=0.9):
    corr_matrix = dataframe.corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    return to_drop

numerical_df = df.iloc[:, 2:]  # ID ve hedef hariç sayısal sütunlar
redundant_features = kolerasyon_kaldir(numerical_df, threshold=0.9)


In [4]:
# ---------------------- 4. Giriş ve hedef değişkeni ayır ----------------------

X = df.drop("diagnosis", axis=1)
y = df["diagnosis"]

# ---------------------- 5. Veriyi Ölçeklendir ----------------------

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ---------------------- 6. Eğitim ve test kümeleri ----------------------

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
# ---------------------- 7. SMOTE ile Dengeleme ----------------------

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)


In [15]:
# ---------------------- 8. SVM Modeli için GridSearchCV ----------------------

# Parametre gridini tanımlama
param_grid = {
    'C': [0.1, 1, 10, 100], 
    'gamma': [0.001, 0.01, 0.1, 1], 
    'kernel': ['linear', 'rbf','poly','sigmoid'] 
}

# GridSearchCV ile en iyi parametreler bulunur.
grid_search = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_res, y_train_res)

In [16]:

# ---------------------- 9. En İyi Parametrelerle Modeli Eğit ----------------------

best_model = grid_search.best_estimator_


In [17]:
# ---------------------- 10. Test verisi üzerinde tahmin yapalım ----------------------

y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

# ---------------------- 11. Model Performansını Değerlendirme ----------------------

print("✅ Doğruluk (Accuracy):", accuracy_score(y_test, y_pred))
print("🎯 ROC AUC:", roc_auc_score(y_test, y_proba))
print("\n📋 Sınıflandırma Raporu:")
print(classification_report(y_test, y_pred, target_names=["Benign", "Malignant"]))

# ---------------------- 12. En İyi Parametreleri Göster ----------------------
print("En İyi Parametreler: ", grid_search.best_params_)


✅ Doğruluk (Accuracy): 0.9824561403508771
🎯 ROC AUC: 0.996069439895185

📋 Sınıflandırma Raporu:
              precision    recall  f1-score   support

      Benign       0.97      1.00      0.99        71
   Malignant       1.00      0.95      0.98        43

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

En İyi Parametreler:  {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
