In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
%run ../Functions.ipynb

In [4]:
# Directory structure
base_dir = r"D:\Om Saran\ML\adobe\dataset\open"
categories = ['straight', 'not_straight'] 

In [5]:
# Data storage
data = []
labels = []

# Load data
for category in categories:
    folder_path = os.path.join(base_dir, category)
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if file_path.endswith('.csv'):
            # Read CSV file
            points = pd.read_csv(file_path)
            data.append(points.values)  # Store points as numpy array or DataFrame
            labels.append(category)  # Store the label

In [6]:
unique_labels = list(set(labels))

print(unique_labels)

['not_straight', 'straight']


In [7]:
X = []
Y = []

for curve, label in zip(data, labels):
    features = extract_features(curve)
    if features.size > 0:  # Ensure that features are not empty
        X.append(features)
        Y.append(label)
# Convert lists to numpy arrays
X = np.array(X)
Y = np.array(Y)

In [8]:
X = X.reshape(X.shape[0], -1)

In [9]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3,random_state=42)

# test_size: proportion of the dataset to include in the test split (20% here)
# random_state: seed used by the random number generator for reproducibility

In [10]:
from sklearn.ensemble import RandomForestClassifier

# Create and train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, Y_train)

RandomForestClassifier(random_state=42)

In [11]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Make predictions
Y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(Y_test, Y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report
print('Classification Report:')
print(classification_report(Y_test, Y_pred))

# Print confusion matrix
print('Confusion Matrix:')
print(confusion_matrix(Y_test, Y_pred))

Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

not_straight       1.00      1.00      1.00        10
    straight       1.00      1.00      1.00         9

    accuracy                           1.00        19
   macro avg       1.00      1.00      1.00        19
weighted avg       1.00      1.00      1.00        19

Confusion Matrix:
[[10  0]
 [ 0  9]]


In [12]:
from joblib import dump, load
dump(model, '../Models/model_open.joblib')

['model_open.joblib']