In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Data Gathering

In [2]:
dataset =  []
paths = [
    'C:/Users/DELL/Downloads/gender/dataset1/train/man',
    'C:/Users/DELL/Downloads/gender/dataset1/train/woman'
]

In [3]:
for i in paths:
    folder_name = os.path.basename(i)
    
    # Iterate over the images in the subdirectory
    for file_name in os.listdir(i):
        image_path = os.path.join(i, file_name)
        
        if os.path.isfile(image_path):  # Only consider files
            # Load the image using OpenCV
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            
            # If the image was successfully loaded
            if image is not None:
                # Resize the grayscale image to 250X250 pixels
                resized_image = cv2.resize(image, (250, 250))
                
                # Flatten the image and append each pixel as a separate feature along with the label to the dataset
                flattened_image = resized_image.flatten().tolist()
                dataset.append(flattened_image + [folder_name])

In [4]:
# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset, columns=[f'pixel_{i+1}' for i in range(250*250)] + ['label'])

# Normalize the pixel values between 0 and 1
X = df.iloc[:, :-1] / 255
Y = df.iloc[:, -1]

# Encode the labels with numeric values
label_encoder = LabelEncoder()
Y_encoded = label_encoder.fit_transform(Y)
y_series = pd.Series(Y_encoded, name='Target')

# Concatenate 'X' (features) and 'y_series' (target variable) along columns (axis=1)
df_encoded = pd.concat([X, y_series], axis=1)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y_series, test_size=0.2, random_state=42)

In [5]:
df

Unnamed: 0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,pixel_10,...,pixel_62492,pixel_62493,pixel_62494,pixel_62495,pixel_62496,pixel_62497,pixel_62498,pixel_62499,pixel_62500,label
0,63,64,65,66,68,70,71,73,74,75,...,19,19,19,19,19,19,19,20,20,man
1,247,247,248,248,248,248,248,248,248,248,...,32,32,31,30,30,31,33,34,36,man
2,153,153,160,169,178,177,168,160,154,152,...,21,21,20,20,20,20,21,21,21,man
3,22,20,18,16,14,11,15,20,27,31,...,33,35,35,36,37,37,37,37,38,man
4,82,82,81,80,78,77,73,70,67,63,...,157,156,156,156,171,188,205,216,216,man
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,15,16,17,19,22,24,24,24,23,21,...,182,180,178,177,176,173,171,169,167,woman
1596,27,29,32,33,33,30,28,29,31,35,...,24,19,21,22,23,29,35,35,35,woman
1597,70,39,27,47,60,59,48,51,77,92,...,99,99,99,98,97,96,95,94,94,woman
1598,46,44,34,25,17,22,23,17,20,40,...,5,7,8,11,13,13,4,3,6,woman


In [6]:
from sklearn.linear_model import LogisticRegression

logreg =  LogisticRegression()


In [None]:
logreg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
y_pred = logreg.predict(X_test)

In [None]:
y_pred

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix
acc = accuracy_score(y_test, y_pred)
acc

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm

In [None]:
import joblib
joblib.dump(logreg, 'clmodel.pkl')


In [None]:
import cv2
import os
import numpy as np
# Load the model from the saved file
model = joblib.load('clmodel.pkl')
# Function to preprocess an input image before making predictions
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    resized_image = cv2.resize(image, (250, 250))
    flattened_image = resized_image.flatten().reshape(1, -1)
    return flattened_image / 255.0

# Function to make predictions using the loaded model
def make_prediction(image_path):
    preprocessed_image = preprocess_image(image_path)
    prediction = model.predict(preprocessed_image)
    predicted_class = prediction
    return predicted_class
test_folder = "C:/Users/DELL/Downloads/gender/dataset1/test/man"  # Replace with the path to your test folder

for file_name in os.listdir(test_folder):
    image_path = os.path.join(test_folder, file_name)
    
    if os.path.isfile(image_path):  # Only consider files
        predicted_class = make_prediction(image_path)
        if predicted_class == 0:
            print(f"Image: {file_name} | Predicted Class: MAN")
        if predicted_class == 1:
            print(f"Image: {file_name} | Predicted Class: WOMAN") 

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline

clf = SGDClassifier(loss = 'log_loss',max_iter=1000, tol=1e-3)
clf.fit(X_train,y_train)


In [None]:
sgdpred = clf.predict(X_test)

In [None]:
clf.decision_function(X_test)

In [None]:
clf.densify()

In [None]:
clf.fit(X_train, y_train, coef_init=None, intercept_init=None, sample_weight=None)

In [None]:
clf.predict_log_proba(X_test)

In [None]:
acc = accuracy_score(y_test,sgdpred)
acc

In [None]:
cm = confusion_matrix(y_test, sgdpred)
cm

In [None]:
from sklearn.svm import SVC
svcClf = SVC(kernel="sigmoid",gamma='auto', C=3,random_state = 42 )

In [None]:
svcClf.fit(X_train, y_train)

In [None]:
spred = svcClf.predict(X_test)

In [None]:
acc = accuracy_score(y_test,spred)
acc

In [None]:
cm = confusion_matrix(y_test, spred)
cm

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rclf = RandomForestClassifier(n_estimators=200,max_depth=3, random_state=0)
rclf.fit(X_train, y_train)

In [None]:
rpred = rclf.predict(X_test)
rpred

In [None]:
acc = accuracy_score(y_test,rpred)
acc

In [None]:
cm = confusion_matrix(y_test, rpred)
cm