In [15]:
import numpy as np


def aci(data, R, L):
    dt = []
    N = len(data)
    data = np.column_stack((np.arange(1, N+1), data))  # Creating a matrix with indices and data
    
    for i in range(1 + L, N - R + 1):
        P = data[i - L - 1:i + R, :]
        P = P.astype(float)
        ang = np.arctan2(np.linalg.det([P[0, :] - P[1, :], P[1, :] - P[2, :]]), np.dot(P[0, :] - P[1, :], P[1, :] - P[2, :]))
        deger = 180 + int(np.floor(ang * 180 / np.pi))
        dt.append(deger)
    
    return np.array(dt)

# example Usage:
data_example = [13, 256, 3, 44, 5, 65, 7, 8, 9]
R_example = 2
L_example = 1
result = aci(data_example, R_example, L_example)
print(result)


[  0 358   2 357   1 314]


In [20]:
import numpy as np
from collections import Counter


L=2
R=2
with open('spam/D1.txt', 'r') as file:
    A = file.readlines()

with open('spam/D2.txt', 'r') as file:
    B = file.readlines()

features = []

for i in range(len(A)):
    dosya = A[i].strip()
    bytes = [ord(char) for char in dosya]

    if len(bytes) > 10:
        veri = bytes

        b = aci(veri, L, R)
        count = dict(Counter(b))

        if B[i].strip() == 'ham':
            label = 1
        else:
            label = 0

        # Ensure all 'count' arrays have the same length
        max_length = 360
        padded_count = np.zeros(max_length)
        padded_count[list(count.keys())] = list(count.values())

        features.append(np.concatenate((padded_count, [label])))

# Convert the features list to a NumPy array
features = np.array(features)

# Print or use the 'features' array as needed
print(features.shape)


(1233, 361)


In [21]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


# Giriş öznitelikleri ve çıkış sütununu ayırma
X = features[:, :features.shape[1]-1]  
y = features[:, features.shape[1]-1]   # Son sütun çıkış


# Veriyi eğitim ve test setlerine bölelim
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# KNN sınıflandırıcı
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)
knn_predictions = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_predictions)
print("KNN Başarı:", knn_accuracy)

knn_conf_matrix = confusion_matrix(y_test, knn_predictions)
print("KNN Karışıklık Matrisi:")
print(knn_conf_matrix)

# Naive Bayes sınıflandırıcı
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
nb_predictions = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)
print("Naive Bayes Başarı", nb_accuracy)

nb_conf_matrix = confusion_matrix(y_test, nb_predictions)
print("Naive Bayes Karışıklık Matrisi:")
print(nb_conf_matrix)

KNN Başarı: 0.9027027027027027
KNN Karışıklık Matrisi:
[[ 48  36]
 [  0 286]]
Naive Bayes Başarı 0.8918918918918919
Naive Bayes Karışıklık Matrisi:
[[ 80   4]
 [ 36 250]]
