# install

In [1]:
!pip install fasttext
!pip install sastrawi # indian language stem
!pip install transformers

Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.8/68.8 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.11.1-py3-none-any.whl (227 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4199773 sha256=8d70f2a4b778f4acd8301c3ce5af7a1f5e240d8488fc807fa58cf78403fa6c9e
  Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394
Successfully built fasttext
Installing collected packages: pybind11, fasttext
Successfully installed fasttext-0.9.2 pybind11-2.11.1
Collecting sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [2]:
import pandas as pd
import numpy as np

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

import fasttext

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [3]:
# download dataset https://www.kaggle.com/datasets/dodyagung/accident
!gdown 1AAN0R-vTkpx9Mwp6JprRqK6fxOFPL6oK

Downloading...
From: https://drive.google.com/uc?id=1AAN0R-vTkpx9Mwp6JprRqK6fxOFPL6oK
To: /content/traffic_twitter.zip
100% 329M/329M [00:06<00:00, 52.3MB/s]


In [4]:
# Download the pre-trained GloVe embeddings
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove.6B.zip -d glove


--2023-08-07 21:39:31--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2023-08-07 21:39:31--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2023-08-07 21:39:32--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [5]:
!unzip traffic_twitter.zip

Archive:  traffic_twitter.zip
  inflating: twitter.csv             
  inflating: twitter_label_auto.csv  
  inflating: twitter_label_manual.csv  


In [6]:
data = pd.read_csv('twitter_label_manual.csv')

# preprocess

In [7]:
data.head()

Unnamed: 0,id_str,created_at,crawled_at,screen_name,is_accident,full_text
0,1113976371515146240,2019-04-05 08:27:18,2020-02-08 12:30:29,mrahmatr7,1,"Rekaman CCTV Kecelakaan Motor di PIK, depan Ta..."
1,1114010329313206272,2019-04-05 10:42:14,2020-02-08 12:30:43,PanritaNews,1,"Tewaskan 346 Orang dalam 2 Kecelakaan, Boss Bo..."
2,1114037478237855745,2019-04-05 12:30:07,2020-02-08 12:30:56,VICE_ID,0,Anggota parlemen Taiwan juga berencana meningk...
3,1114121169873932288,2019-04-05 18:02:40,2020-02-08 12:31:38,OfficialSyariah,0,C.Gerakan.bicara pertolongan pertama pada kece...
4,1114149728323682305,2019-04-05 19:56:09,2020-02-08 12:31:54,AhliAsuransi,0,Asuransi mana nih??\n\nhttps://t.co/AJyABmimcY...


In [8]:
data.shape

(1002, 6)

In [9]:
def text_preprocess(series, stemmer, stopwords):
    df = series.str.replace("\n\t",  " ")
    df = df.str.replace(r"[^a-zA-Z ]+", "")
    df = df.str.lower()
    df = df.apply(lambda x: ' '.join([stemmer.stem(item) for item in x.split() if item not in stopwords]))

    return df

In [10]:
stopwords = StopWordRemoverFactory().get_stop_words()
stemmer = StemmerFactory().create_stemmer()

In [11]:
data = data[:500].copy() # reduce dataset for performance

In [12]:
data['processed_text'] = text_preprocess(data['full_text'], stemmer, stopwords)

  df = df.str.replace(r"[^a-zA-Z ]+", "")


# model

fastext

In [13]:
# write the content to a text file, for fasttext model
with open('twitter.txt', 'w') as f:
    f.write('\n'.join(data['processed_text'].astype(str)))

In [14]:
model = fasttext.train_unsupervised('twitter.txt', model='skipgram', dim=100)

In [15]:
data['fasttext'] = data['processed_text'].apply(lambda x: model.get_sentence_vector(x))

word2vec

In [16]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec

# Define the Word2Vec vectorization function
def vectorize_text(text, w2v_model):
    words = text.split()
    vectors = [w2v_model.wv[word] for word in words if word in w2v_model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(w2v_model.vector_size)

# Train a Word2Vec model on the preprocessed text
sentences = [text.split() for text in data['processed_text']]
w2v_model = Word2Vec(sentences, window=5, min_count=1, workers=4)

# Vectorize the preprocessed text using the Word2Vec model
data['word2vec'] = data['processed_text'].apply(lambda x: vectorize_text(x, w2v_model))

GloVe

In [17]:
# Load the GloVe embeddings into memory
glove_path = 'glove/glove.6B.100d.txt'
glove_embeddings = {}
with open(glove_path, 'r', encoding='utf8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        glove_embeddings[word] = coefs

# Define the GloVe vectorization function
def vectorize_text(text):
    tokens = text.split()
    vectors = []
    for token in tokens:
        if token in glove_embeddings:
            vectors.append(glove_embeddings[token])
    if len(vectors) > 0:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(100)  # return a vector of zeros if no valid tokens are found

# Vectorize the preprocessed text using GloVe
data['glove'] = data['processed_text'].apply(vectorize_text)

BERT

In [18]:
import torch
from transformers import AutoTokenizer, AutoModel

# Define the BERT vectorization function
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')

def vectorize_text(text):
    inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)[0]
    return outputs.mean(dim=1).squeeze().numpy()


# Vectorize the preprocessed text using BERT
data['bert'] = data['processed_text'].apply(vectorize_text)


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [19]:
data.head()
# data = data.drop('word2vec1', axis=1)

Unnamed: 0,id_str,created_at,crawled_at,screen_name,is_accident,full_text,processed_text,fasttext,word2vec,glove,bert
0,1113976371515146240,2019-04-05 08:27:18,2020-02-08 12:30:29,mrahmatr7,1,"Rekaman CCTV Kecelakaan Motor di PIK, depan Ta...",rekam cctv celaka motor pik depan taman grisen...,"[-0.013006344, 0.08466679, -0.057704847, -0.06...","[0.002368716, 0.00089951954, 0.0013855002, 0.0...","[0.063926205, 0.0043275952, 0.2179518, 0.10591...","[0.054724503, 0.25336766, 0.49611, 0.12220571,..."
1,1114010329313206272,2019-04-05 10:42:14,2020-02-08 12:30:43,PanritaNews,1,"Tewaskan 346 Orang dalam 2 Kecelakaan, Boss Bo...",tewas orang celaka boss boeing minta maaf http...,"[-0.027910735, 0.07607751, -0.049789697, -0.04...","[-0.0017239314, 0.003566381, 0.0016135403, -0....","[0.36248598, -0.16214201, 0.096906, -0.0685519...","[0.077111796, 0.44264176, -0.023823604, -0.026..."
2,1114037478237855745,2019-04-05 12:30:07,2020-02-08 12:30:56,VICE_ID,0,Anggota parlemen Taiwan juga berencana meningk...,anggota parlemen taiwan rencana tingkat denda ...,"[-0.019338382, 0.092050105, -0.06558499, -0.05...","[-0.0024091587, 0.0006969254, 0.0009730155, 0....","[-0.005034062, -0.27989158, -0.159691, 0.04854...","[-0.19983459, 0.090510204, -0.13769467, -0.060..."
3,1114121169873932288,2019-04-05 18:02:40,2020-02-08 12:31:38,OfficialSyariah,0,C.Gerakan.bicara pertolongan pertama pada kece...,cgerakanbicara tolong pertama celaka pkbakat h...,"[-0.030455967, 0.08570286, -0.08142066, -0.043...","[-0.0043439525, -2.413083e-05, 0.0017528838, 0...","[0.36396, -0.39613, -0.28717, 0.45257, -0.6475...","[-0.004425594, 0.26324335, 0.02596502, -0.1280..."
4,1114149728323682305,2019-04-05 19:56:09,2020-02-08 12:31:54,AhliAsuransi,0,Asuransi mana nih??\n\nhttps://t.co/AJyABmimcY...,asuransi mana nihhttpstcoajyabmimcyppatk beri ...,"[-0.017410709, 0.08686921, -0.07440367, -0.057...","[-0.0002699182, 0.00011225976, 0.0006368613, -...","[-0.05621981, -0.3753288, 0.12212621, -0.07811...","[-0.18520285, 0.2537315, 0.10447164, 0.0654399..."


In [20]:
data.shape

(500, 11)

In [21]:
models = (data['fasttext'], data['word2vec'], data['glove'], data['bert'])

# classify


In [22]:
data['is_accident']

0      1
1      1
2      0
3      0
4      0
      ..
495    0
496    0
497    1
498    1
499    0
Name: is_accident, Length: 500, dtype: int64

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

In [24]:
# wlaczane dla MLP bo warningi przeszkadzaja w obserwacji wynikow - potem raczej wylaczyc
from warnings import filterwarnings
filterwarnings('ignore')
# filterwarnings('default')

In [25]:
def classifier(kind):
  if kind == 'RF':
    clf = RandomForestClassifier(n_estimators=50, random_state=1)

  if kind == 'KNN':
    clf = KNeighborsClassifier(n_neighbors=5)

  if kind == 'MLP':
    clf = MLPClassifier()

  return clf

def ensemble (ens_kind, clf_kind, n):
  if ens_kind == 'bagging':
    clf = classifier(clf_kind)
    ensembleClf = BaggingClassifier(clf, max_samples=0.7, n_estimators=n, max_features=0.7, random_state=42)    #        MLPClassifier(), KNeighborsClassifier(), None = drzewo decyzyjne
  if ens_kind == 'voting':
    clf1 = classifier("RF")
    clf2 = classifier("MLP")
    clf3 = classifier("KNN")
    if ('RF' and 'MLP') in clf_kind:
      ensembleClf = VotingClassifier(estimators=[(str(clf1), clf1), (str(clf2), clf2)], voting=n)
    elif ('RF' and 'KNN') in clf_kind:
      ensembleClf = VotingClassifier(estimators=[(str(clf1), clf1), (str(clf3), clf3)], voting=n)
    elif ('MLP' and 'KNN') in clf_kind:
      ensembleClf = VotingClassifier(estimators=[(str(clf2), clf2), (str(clf3), clf3)], voting=n)
    else:
      ensembleClf = VotingClassifier(estimators=[(str(clf1), clf1), (str(clf2), clf2), (str(clf3), clf3)], voting=n)

  return ensembleClf



In [26]:
# WYGODNE DO PYTAŃ 2 I 3
def klasyfikatory(ktory):

  kinds = ['RF', 'MLP', 'KNN']

  if ktory == 'w':
    wszystko = ['single_classifiers', 'bagging', 'voting']
    for i in wszystko:
      klasyfikatory(i)

  if ktory == 'single_classifiers':
    for k in kinds:
      clf = classifier(k)
      clf.fit(list(X_train), list(y_train))
      y_pred = clf.predict(list(X_test))
      ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
      print(f"Single classifier: {k}, evaluation: {ocena}\n\n")

  if ktory == 'bagging':
    for k in kinds:
      ensembleClf = ensemble('bagging', k, 10)
      ensembleClf.fit(list(X_train), list(y_train))
      y_pred = ensembleClf.predict(list(X_test))
      ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
      print(f"bagging of {k}, evaluation: {ocena}\n\n")

  if ktory == 'voting':
    voting_type = ['soft', 'hard']
    for v in voting_type:
      ensembleClf = ensemble('voting', kinds, v)
      ensembleClf.fit(list(X_train), list(y_train))
      y_pred = ensembleClf.predict(list(X_test))
      ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
      print(f"{v} voting of {kinds}, evaluation: {ocena}\n\n")

EWALUACJA

In [27]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import RocCurveDisplay

In [28]:
def evaluation(test, pred, sposob):
  if sposob == 'w':
    # brzydko się wykonuje dla wszystkich bo podpis co to wgl jest wypisuje sie jako ostatni i nwm jak to zmienić, ale poza tym to działa
    wszystko = ['acc', 'cnf', 'rs', 'ps', 'roc']
    for i in wszystko:
      print(evaluation(test, pred, i))
  else:
    if sposob == 'acc':
      acc = accuracy_score(test, pred)
      return (f"Accuracy = {acc}")
    if sposob == 'cnf':
      cnf = confusion_matrix(test, pred)
      return (f"Confusion matrix:\n {cnf}")
    if sposob == 'rs':
      rs= recall_score(test, pred)
      return (f"Recall score = {rs}")
    if sposob == 'ps':
      ps = precision_score(test, pred)
      return (f"Precision score = {ps}")
    if sposob == 'roc':
      # dla tego trochę lepiej ale też brzydko
      RocCurveDisplay.from_predictions(
        test, pred)
      plt.show()
      return 'ROC curve & AUC\n'

**Question 1**

Do additional ensamble classifiers increase performance compared to a single classifier?

Solution characteristics:
1. Different combinations of classifiers:
* single: RF, KNN, MLP
* ensemble bagging (different amounts of estimators per each classifier type separately)
* ensemble voting (all classifier types together and 3 different pairs; hard and soft voting)
2. FastText as vectorization model (might be changed)
3. Accuracy as evaluation method (might be changed)

In [29]:
vectorization_type = models[0]
sposob_ewaluacji= 'acc'

In [30]:
X_train, X_test, y_train, y_test = train_test_split(vectorization_type, data['is_accident'], test_size=0.2, random_state=42)

In [31]:
# single classifiers
kinds = ['RF', 'MLP', 'KNN']
for k in kinds:
  clf = classifier(k)
  clf.fit(list(X_train), list(y_train))
  y_pred = clf.predict(list(X_test))
  ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
  print(f"Single classifier: {k}, evaluation: {ocena}\n\n")

Single classifier: RF, evaluation: Accuracy = 0.62


Single classifier: MLP, evaluation: Accuracy = 0.58


Single classifier: KNN, evaluation: Accuracy = 0.73




In [32]:
# bagging
# n_est_list = [1, 5, 10, 20, 50, 80, 100, 150, 200, 250, 300, 350]
n_est_list = [5, 100, 200]
for k in kinds:
  for i in n_est_list:
    ensembleClf = ensemble('bagging', k, i)
    ensembleClf.fit(list(X_train), list(y_train))
    y_pred = ensembleClf.predict(list(X_test))
    ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
    print(f"bagging of {k}, n_estimators = {i}, evaluation: {ocena}")

bagging of RF, n_estimators = 5, evaluation: Accuracy = 0.64
bagging of RF, n_estimators = 100, evaluation: Accuracy = 0.63
bagging of RF, n_estimators = 200, evaluation: Accuracy = 0.64
bagging of MLP, n_estimators = 5, evaluation: Accuracy = 0.58
bagging of MLP, n_estimators = 100, evaluation: Accuracy = 0.59
bagging of MLP, n_estimators = 200, evaluation: Accuracy = 0.59
bagging of KNN, n_estimators = 5, evaluation: Accuracy = 0.59
bagging of KNN, n_estimators = 100, evaluation: Accuracy = 0.65
bagging of KNN, n_estimators = 200, evaluation: Accuracy = 0.7


In [33]:
# voting
clf_combinations = [['RF', 'KNN', 'MLP'], ['RF', 'KNN'], ['RF', 'MLP'], ['KNN', 'MLP']]
voting_type = ['soft', 'hard']
for c in clf_combinations:
  for v in voting_type:
    ensembleClf = ensemble('voting', c, v)
    ensembleClf.fit(list(X_train), list(y_train))
    y_pred = ensembleClf.predict(list(X_test))
    ocena = evaluation(y_test, y_pred, sposob_ewaluacji)
    print(f"{v} voting of {c}, evaluation: {ocena}")

soft voting of ['RF', 'KNN', 'MLP'], evaluation: Accuracy = 0.64
hard voting of ['RF', 'KNN', 'MLP'], evaluation: Accuracy = 0.58
soft voting of ['RF', 'KNN'], evaluation: Accuracy = 0.71
hard voting of ['RF', 'KNN'], evaluation: Accuracy = 0.66
soft voting of ['RF', 'MLP'], evaluation: Accuracy = 0.63
hard voting of ['RF', 'MLP'], evaluation: Accuracy = 0.56
soft voting of ['KNN', 'MLP'], evaluation: Accuracy = 0.61
hard voting of ['KNN', 'MLP'], evaluation: Accuracy = 0.58


**Question 2**

Does PCA feature reduction improve the performance of classifiers?

Solution characteristics
1. Classification with and without PCA
2. Both clasifications compared for single and ensemble classifiers:
* single: RF, KNN, MLP
* ensemble bagging (1 combination - 10 estimators per each classifier type separately)
* ensemble voting (1 combination - all classifier types together; hard and soft voting)
3. FastText as vectorization model (might be changed)
4. Accuracy as evaluation method (might be changed)

In [34]:
vectorization_type = models[0]
sposob_ewaluacji= 'acc'
jakie_klasyfikatory_do_porownania = 'w'

In [35]:
X = vectorization_type

print("No PCA:")
X_train, X_test, y_train, y_test = train_test_split(X, data['is_accident'], test_size=0.2, random_state=43)
klasyfikatory(jakie_klasyfikatory_do_porownania)

print("\nPCA:")
pca_input = X.tolist()   # wejscie do PCA to array wiec jesli X to nie array, to zrob array/liste
pca = PCA(n_components=50)

pca_embeddings  = pca.fit_transform(X.tolist())

X_train, X_test, y_train, y_test = train_test_split(pca_embeddings, data['is_accident'], test_size=0.2, random_state=42)
klasyfikatory(jakie_klasyfikatory_do_porownania)


No PCA:
Single classifier: RF, evaluation: Accuracy = 0.65


Single classifier: MLP, evaluation: Accuracy = 0.6


Single classifier: KNN, evaluation: Accuracy = 0.71


bagging of RF, evaluation: Accuracy = 0.65


bagging of MLP, evaluation: Accuracy = 0.61


bagging of KNN, evaluation: Accuracy = 0.65


soft voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.59


hard voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.63



PCA:
Single classifier: RF, evaluation: Accuracy = 0.69


Single classifier: MLP, evaluation: Accuracy = 0.6


Single classifier: KNN, evaluation: Accuracy = 0.66


bagging of RF, evaluation: Accuracy = 0.71


bagging of MLP, evaluation: Accuracy = 0.58


bagging of KNN, evaluation: Accuracy = 0.67


soft voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.64


hard voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.62




**Question 3**

How does FastText do comparing to other vectorization models?

Solution characteristics
1. 4 different models for text vectorization:
* FastText
* Word2Vec
* GLOVE
* BERT
2. Comparison of single and ensemble classifiers:
* single: RF, KNN, MLP
* ensemble bagging (1 combination - 10 estimators per each classifier type separately)
* ensemble voting (1 combination - all classifier types together; hard and soft voting)
3. Accuracy as evaluation method (might be changed)

In [36]:
sposob_ewaluacji = 'acc'
jakie_klasyfikatory_do_porownania = 'w'

In [37]:
for j in models:
  vectorization_type = j
  print(f"\nVectorization model: {j.name}")
  X_train, X_test, y_train, y_test = train_test_split(vectorization_type, data['is_accident'], test_size=0.2, random_state=42)

  klasyfikatory(jakie_klasyfikatory_do_porownania)


Vectorization model: fasttext
Single classifier: RF, evaluation: Accuracy = 0.62


Single classifier: MLP, evaluation: Accuracy = 0.58


Single classifier: KNN, evaluation: Accuracy = 0.73


bagging of RF, evaluation: Accuracy = 0.64


bagging of MLP, evaluation: Accuracy = 0.58


bagging of KNN, evaluation: Accuracy = 0.68


soft voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.63


hard voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.58



Vectorization model: word2vec
Single classifier: RF, evaluation: Accuracy = 0.76


Single classifier: MLP, evaluation: Accuracy = 0.57


Single classifier: KNN, evaluation: Accuracy = 0.74


bagging of RF, evaluation: Accuracy = 0.72


bagging of MLP, evaluation: Accuracy = 0.58


bagging of KNN, evaluation: Accuracy = 0.72


soft voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.73


hard voting of ['RF', 'MLP', 'KNN'], evaluation: Accuracy = 0.59



Vectorization model: glove
Single classifier: RF, evaluation: Accuracy = 