In [1]:
from src.constants import PROJECT_ROOT_PATH

In [2]:
import os
import shutil
from tqdm import tqdm

def find_images(folder_path, extensions=['.jpg', '.png', '.jpeg', '.gif', '.bmp']):
    image_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if any(file.lower().endswith(ext) for ext in extensions):
                image_paths.append(os.path.join(root, file))
    return image_paths



In [3]:
# folder_path = "data/sliced_cards/11-28-2023"
# images = find_images(folder_path)

# new_folder_path = "data/training_data"
# os.makedirs(new_folder_path, exist_ok=True)

# for i, image_path in enumerate(images):
#     new_image_path = os.path.join(new_folder_path, f"{i:04d}.jpg")
#     shutil.copy(image_path, new_image_path)

In [4]:
class_0_path = PROJECT_ROOT_PATH/ "data/checkbox_model_training_data/empty"
class_1_path = PROJECT_ROOT_PATH/"data/checkbox_model_training_data/checked"

class_0_images = find_images(class_0_path)
class_1_images = find_images(class_1_path)

class_0_classifications = [0] * len(class_0_images)
class_1_classifications = [1] * len(class_1_images)

In [5]:
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Load and preprocess images
def load_images(image_paths):
    images = []
    for path in image_paths:
        img = Image.open(path).convert('L')  # Convert to grayscale
        img = img.resize((25, 25))  # Resize image
        img_array = np.array(img).flatten()  # Flatten image
        images.append(img_array)
    return images

# Paths to your images and corresponding labels
image_paths = class_0_images + class_1_images
labels = class_0_classifications + class_1_classifications

images = load_images(image_paths)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.3)

# Initialize classifier
classifier = SVC()

# Train classifier
classifier.fit(X_train, y_train)

# Evaluate classifier
accuracy = classifier.score(X_test, y_test)
print(f"Accuracy: {accuracy}")


Accuracy: 0.9901960784313726


In [6]:
import pickle

model_path = PROJECT_ROOT_PATH / 'static/checkbox_model_3.pkl'
if model_path.exists():
    proceed = input(f"Model {model_path} already exists. Do you want to overwrite it? (y/n): ")
    if proceed.lower() != 'y':
        print("Operation cancelled by the user.")
        raise SystemExit


# Save the trained model
with open(model_path, 'wb') as f:
    pickle.dump(classifier, f)

# Load the trained model
with open(model_path, 'rb') as f:
    loaded_classifier = pickle.load(f)


In [8]:
final_test_folder = PROJECT_ROOT_PATH/"data/06_Segments/included_in_rent/cook_fuel"
final_test_paths = find_images(final_test_folder)
final_test_images = load_images(final_test_paths)

classifier.predict(final_test_images)

os.makedirs(PROJECT_ROOT_PATH/'pred_0/', exist_ok=True)
os.makedirs(PROJECT_ROOT_PATH/'pred_1/', exist_ok=True)

predictions = classifier.predict(final_test_images)
for i, pred in enumerate(predictions):
    if pred == 0:
        shutil.copy(final_test_paths[i], PROJECT_ROOT_PATH/'pred_0/')
    else:
        shutil.copy(final_test_paths[i], PROJECT_ROOT_PATH/'pred_1/')
