In [None]:
# data set link : https://www.kaggle.com/datasets/techsash/waste-classification-data
# git hub Link :https://github.com/zerobear8530032/ML_Internship 

In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import cv2
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix 


In [14]:

# creating data set 
# def get_all_file_names(path:str) ->list:
#     list = []
#     for f in os.listdir(path):
#         list.append(f"{path}/{f}")
#     return list;


In [15]:
# creating train data set
# train_o=get_all_file_names("DATASET/TRAIN/O")
# train_r=get_all_file_names("DATASET/TRAIN/R")
# train=train_o+train_r

In [16]:
# creating test data set 
# test_o=get_all_file_names("DATASET/TEST/O")
# test_r=get_all_file_names("DATASET/TEST/R")
# test=test_o+test_r

In [17]:
# creating the df from the data :
# traindf=pd.DataFrame({"path":train})

In [18]:
# create find get_label
# def get_label(s:str)-> str :
#     path = s.split("/")
#     if 'O' in path[len(path)-1]:
#         return 'O';
#     return 'R'

In [19]:
# find the labels
# traindf["label"]=traindf["path"].apply(get_label)

In [20]:
# convert to csv :
# traindf.to_csv("DATASET/waste_management.csv",index=False)

In [21]:
df= pd.read_csv("DATASET/waste_management.csv");

In [22]:
# data set contains path of images and label where 
# O : organic 
# R : Recycle 
df

Unnamed: 0,path,label
0,DATASET/TRAIN/O/O_1.jpg,O
1,DATASET/TRAIN/O/O_10.jpg,O
2,DATASET/TRAIN/O/O_100.jpg,O
3,DATASET/TRAIN/O/O_1000.jpg,O
4,DATASET/TRAIN/O/O_10000.jpg,O
...,...,...
22559,DATASET/TRAIN/R/R_9995.jpg,R
22560,DATASET/TRAIN/R/R_9996.jpg,R
22561,DATASET/TRAIN/R/R_9997.jpg,R
22562,DATASET/TRAIN/R/R_9998.jpg,R


In [23]:
# Function to load and preprocess images
# Define the image size for resizing

IMAGE_SIZE = (64, 64)  # Adjust based on your needs
def load_and_preprocess_images(df, image_folder):
    images = []
    labels = []
    for idx, row in df.iterrows():
        img_path = image_folder + row['path']
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, IMAGE_SIZE)  # Resize image
            img_flat = img.flatten()           # Flatten the image to 1D
            images.append(img_flat)
            labels.append(row['label'])        # Append the label
        # print(idx);
    return np.array(images), np.array(labels)



In [24]:
# Load and preprocess the images
# -------------- warning this will take a lot of time for executing 
x,y = load_and_preprocess_images(df, "")

In [47]:
# apply encoder
lc=LabelEncoder()
y_encode= lc.fit_transform(y)

In [45]:
# review data
df

Unnamed: 0,path,label
0,DATASET/TRAIN/O/O_1.jpg,0
1,DATASET/TRAIN/O/O_10.jpg,0
2,DATASET/TRAIN/O/O_100.jpg,0
3,DATASET/TRAIN/O/O_1000.jpg,0
4,DATASET/TRAIN/O/O_10000.jpg,0
...,...,...
22559,DATASET/TRAIN/R/R_9995.jpg,1
22560,DATASET/TRAIN/R/R_9996.jpg,1
22561,DATASET/TRAIN/R/R_9997.jpg,1
22562,DATASET/TRAIN/R/R_9998.jpg,1


In [48]:
# Split the dataset into training and testing sets
x_train,x_test, y_train, y_test = train_test_split(x, y_encode, test_size=0.2, random_state=42)


In [49]:
# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)



In [50]:
# Train the model
clf.fit(x_train, y_train)

In [51]:
# Make predictions on the test set
y_pred = clf.predict(x_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.8092178152005318


In [52]:
def predict(img_path: str, model) -> any:
    IMAGE_SIZE = (64, 64) # predefined image size
    # Check if the image path exists
    if not os.path.exists(img_path):
        raise FileNotFoundError(f"File not found: {img_path}")
# read image
    img = cv2.imread(img_path)  
    if img is None:
        raise ValueError(f"Image cannot be converted to a usable format from: {img_path}")
    try:
        # Resize the image to the defined input size
        img = cv2.resize(img, IMAGE_SIZE)  # Ensure IMAGE_SIZE is defined (e.g., (64, 64))
        # Flatten the image to a 1D array
        img_flat = img.flatten().reshape(1, -1)  # Reshape for model input       
        # Make the prediction
        prediction = model.predict(img_flat)
        return prediction
    except Exception as e:
        print(f"An error occurred during prediction: {e}")
        return None



In [64]:
# prediction of the model :
pred=predict("DATASET/TRAIN/R/R_9995.jpg",clf)
try :
    label = lc.inverse_transform(pred)  
    print(f"Predicted label: {label[0]}")
except:
    print("the prediction is not one of the labels")

Predicted label: 1


In [59]:
# saving model :
def save_model(filename,model):
# Save the model to a file
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
    print(f"Model saved as {filename}")


model/waste_management_model.pkl


In [60]:
# save model :
save_model("model/waste_management_model.pkl",clf)

Model saved as model/waste_management_model.pkl


In [65]:
# loading model
filename = 'model/waste_management_model.pkl'
# Load the model from a file
loaded_model=None
with open(filename, 'rb') as file:
    loaded_model = pickle.load(file)

print("Model loaded successfully.")


Model loaded successfully.


In [66]:
# prediction of the model : checking the model save correctly or not
pred=predict("DATASET/TRAIN/R/R_9995.jpg",loaded_model)
try :
    label = lc.inverse_transform(pred)  
    print(f"Predicted label: {label[0]}")
except:
    print("the prediction is not one of the labels")

Predicted label: 1
