# Random Forest 
### 150 samples, 3 species from openML iris dataset
### keys: boostrapping, aggregating; random: boostrapping and features selection


In [7]:
import arff
import pandas as pd

# Load ARFF file
with open('dataset_61_iris.arff', 'r') as f:
    data = arff.load(f)

# Convert to DataFrame
df = pd.DataFrame(data['data'], columns=[attr[0] for attr in data['attributes']])

# Show data
print(df.head())
print(df.shape[0])
print(df['class'].nunique())         # Number of unique classes
print(df['class'].unique())          # List of class names

   sepallength  sepalwidth  petallength  petalwidth        class
0          5.1         3.5          1.4         0.2  Iris-setosa
1          4.9         3.0          1.4         0.2  Iris-setosa
2          4.7         3.2          1.3         0.2  Iris-setosa
3          4.6         3.1          1.5         0.2  Iris-setosa
4          5.0         3.6          1.4         0.2  Iris-setosa
150
3
['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

# Convert class to binary (positive = 1, negative = 0)
df['class'] = LabelEncoder().fit_transform(df['class'])
print(df['class'][:5])        # e.g., [1 0 1 0 1]

X = df.drop(columns=['class'])     # Features
y = LabelEncoder().fit_transform(df['class'])  # Convert 'tested_positive'/'tested_negative' to 1/0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


0    0
1    0
2    0
3    0
4    0
Name: class, dtype: int32
Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

