In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import os
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
import numpy as np

def load_images_from_folder(folder):
    images = []
    labels = []
    print("Loading images from folder:", folder)
    for label in os.listdir(folder):
        label_folder = os.path.join(folder, label)
        if os.path.isdir(label_folder):  # Check if it's a directory
            for filename in os.listdir(label_folder):
                img = image.load_img(os.path.join(label_folder, filename), target_size=(224, 224))
                img = image.img_to_array(img)
                images.append(img)
                labels.append(label)
        else:
            # If it's not a directory, assume all images are directly in the unlabeled folder
            img = image.load_img(os.path.join(folder, label), target_size=(224, 224))
            img = image.img_to_array(img)
            images.append(img)
            labels.append(label)

    return np.array(images), np.array(labels)

# Load labeled data
labeled_folder = '/content/drive/My Drive/dataset4/labeled'
X_labeled, y_labeled = load_images_from_folder(labeled_folder)

# Load unlabeled data
unlabeled_folder = '/content/drive/My Drive/dataset4/unlabeled'
X_unlabeled, _ = load_images_from_folder(unlabeled_folder)

# Split labeled data into training and testing sets
X_train_labeled, X_test, y_train_labeled, y_test = train_test_split(X_labeled, y_labeled, test_size=0.2, random_state=42)
print(X_unlabeled)

Loading images from folder: /content/drive/My Drive/dataset4/labeled
Loading images from folder: /content/drive/My Drive/dataset4/unlabeled
[[[[119. 154. 208.]
   [118. 155. 208.]
   [115. 155. 207.]
   ...
   [200. 191. 218.]
   [193. 186. 219.]
   [176. 170. 206.]]

  [[120. 155. 209.]
   [119. 156. 209.]
   [116. 156. 208.]
   ...
   [169. 180. 208.]
   [159. 172. 206.]
   [151. 164. 199.]]

  [[121. 157. 209.]
   [120. 157. 209.]
   [117. 157. 208.]
   ...
   [138. 167. 201.]
   [131. 161. 199.]
   [133. 162. 202.]]

  ...

  [[105.  89.  76.]
   [118. 100.  78.]
   [100.  80.  56.]
   ...
   [ 72.  59.  43.]
   [ 85.  72.  56.]
   [ 96.  83.  67.]]

  [[ 96.  78.  66.]
   [110.  93.  73.]
   [ 76.  58.  36.]
   ...
   [ 80.  66.  53.]
   [ 85.  71.  60.]
   [ 65.  51.  40.]]

  [[110.  92.  80.]
   [ 99.  84.  65.]
   [104.  86.  66.]
   ...
   [ 59.  45.  36.]
   [ 62.  48.  39.]
   [ 62.  48.  39.]]]


 [[[140. 166. 215.]
   [140. 166. 215.]
   [140. 166. 215.]
   ...
   [175. 1

In [9]:
from sklearn.tree import DecisionTreeClassifier

# Initialize Decision Tree classifier
tree_model = DecisionTreeClassifier()

# Flatten image arrays
X_train_labeled_flatten = X_train_labeled.reshape(X_train_labeled.shape[0], -1)

# Train initial model on labeled data
tree_model.fit(X_train_labeled_flatten, y_train_labeled)


In [11]:
# Predict labels for unlabeled data
pseudo_labels = tree_model.predict(X_unlabeled.reshape(X_unlabeled.shape[0], -1))

# Select most confident predictions
confidence_threshold = 0.9
confidence_scores = tree_model.predict_proba(X_unlabeled.reshape(X_unlabeled.shape[0], -1)).max(axis=1)
confident_predictions = confidence_scores > confidence_threshold

X_confident = X_unlabeled[confident_predictions]
pseudo_labels_confident = pseudo_labels[confident_predictions]

# Add confident predictions to labeled data
X_train_labeled = np.concatenate([X_train_labeled, X_confident])
y_train_labeled = np.concatenate([y_train_labeled, pseudo_labels_confident])


In [12]:
# Flatten expanded labeled data
X_train_labeled_flatten = X_train_labeled.reshape(X_train_labeled.shape[0], -1)

# Retrain model with expanded labeled data
tree_model.fit(X_train_labeled_flatten, y_train_labeled)

# Predict labels for test set
y_pred = tree_model.predict(X_test.reshape(X_test.shape[0], -1))

# Calculate accuracy
accuracy = np.mean(y_pred == y_test)
print("Accuracy of the semi-supervised model:", accuracy)

Accuracy of the semi-supervised model: 0.18


In [1]:
from sklearn.model_selection import GridSearchCV

# Define hyperparameters to tune
param_grid = {
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize Decision Tree classifier
tree_model = DecisionTreeClassifier()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=tree_model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_labeled_flatten, y_train_labeled)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

# Retrain the model with the best hyperparameters
tree_model_best = DecisionTreeClassifier(**best_params)
tree_model_best.fit(X_train_labeled_flatten, y_train_labeled)


NameError: name 'DecisionTreeClassifier' is not defined