In [None]:
def segment_image(image):
    # convert the image to hue 
    img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # extract the green colour from the hue
    lower_mask_hue = img_hsv [:,:,0] >= 25
    upper_mask_hue = img_hsv [:,:,0] <= 75
    saturation_lower = img_hsv [:,:,1] >= 40
    saturation_upper = img_hsv[:,:,1] <= 255
    value_lower = img_hsv[:,:,2]>= 50
    value_upper  = img_hsv[:,:,2]<=255
    mask = lower_mask_hue*upper_mask_hue*saturation_lower*saturation_upper*value_lower*value_upper
    red = img_hsv[:,:,0]*mask
    green = img_hsv[:,:,1]*mask
    blue = img_hsv[:,:,2]*mask
    mask = np.dstack((red,green,blue))
    mask = cv2.normalize(mask, None, alpha=0,beta =200, norm_type=cv2.NORM_MINMAX)
    return mask

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
import os
import cv2
from glob import glob
images = []
labels = []
for dirname, folders,_ in os.walk('/kaggle/input/plant-seedlings-classification/train'):
    for folder in folders:
        path = os.path.join(dirname,folder)
        for image_path in glob(os.path.join(path, "*.png")):
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            image = segment_image(image)
            image = cv2.resize(image, (224,224), interpolation = cv2.INTER_AREA)
            images.append(image)
            labels.append(folder)


In [None]:
from keras.utils import np_utils
from sklearn import preprocessing
labels = pd.DataFrame(labels)
le = preprocessing.LabelEncoder()
le.fit(labels[0])
encodeTrainLabels = le.transform(labels[0])
classes = np_utils.to_categorical(encodeTrainLabels)
images = np.asarray(images)
classes = np.asarray(classes).astype('int').reshape((4750,12))

In [None]:
from sklearn.model_selection import train_test_split

trainX, testX, trainY, testY = train_test_split(images, classes, 
                                                test_size=0.2, random_state=2022, 
                                                stratify = classes)

In [None]:
from keras.models import load_model
def load_models():
    base_model = load_model("../input/resnet50-model/resmodel")
    base_model_1 = load_model("../input/model2/incep-resnetmodel/incep_resnet")
    base_model_2 = load_model("../input/model2/model.h5")
    base_model_3 = load_model("../input/model2/Resnet50_accuracy96.2/resmodel")
    models_array = []
    models_array.append(base_model)
    models_array.append(base_model_1)
    models_array.append(base_model_2)
    models_array.append(base_model_3)
    return models_array
models = load_models()

In [None]:
# # we are trying to create a stacking model -> so we will predict on the train and map to the actual  label , then we will built another model to map out the function between the 3 predicted values on the actual value
def predict_models(models_array,trainX):
    print("predicting train 1")
    predict_1 = models_array[0].predict((trainX))
    predict_1 = np.argmax(predict_1, axis=1)
    print("predicting train 2")
    predict_2 = models_array[1].predict((trainX))
    predict_2 = np.argmax(predict_2, axis=1)
    print("predicting train 3")
    predict_3 = models_array[2].predict((trainX))
    predict_3 = np.argmax(predict_3, axis=1)
    print("predicting train 4")
    predict_4 = models_array[3].predict((trainX))
    predict_4 = np.argmax(predict_4, axis=1)
    
    data_frame = pd.DataFrame()
    data_frame["resnet"] = predict_1
    data_frame["resnet2"] = predict_4
    data_frame["incep_resnet"] = predict_2
    data_frame["effinet"] = predict_3
    return data_frame


In [None]:
train_data = predict_models(models,trainX)
train_data["true_label"] = trainY.argmax(1)
test_data = predict_models(models,testX)
test_data["true_label"] = testY.argmax(1)
train_data.to_csv("train_rf.csv", index=False)
test_data.to_csv("test_rf.csv", index=False)

In [None]:
train_data.incep_resnet = train_data.incep_resnet.astype('category')
train_data.resnet = train_data.resnet.astype('category')
train_data.true_label = train_data.true_label.astype('category')
train_data.effinet = train_data.effinet.astype('category')
train_data.resnet2 = train_data.resnet2.astype('category')
train = train_data[["resnet","incep_resnet","effinet","resnet2"]]
labels = train_data.true_label
train = pd.get_dummies(train)
labels = pd.get_dummies(labels)

In [None]:
test_data.incep_resnet = test_data.incep_resnet.astype('category')
test_data.resnet = test_data.resnet.astype('category')
test_data.true_label = test_data.true_label.astype('category')
test_data.effinet = test_data.effinet.astype('category')
test_data.resnet2 = test_data.resnet2.astype('category')
test = test_data[["resnet","incep_resnet","effinet","resnet2"]]
test = pd.get_dummies(test)

In [None]:
from sklearn.ensemble import RandomForestClassifier
# from sklearn.datasets import make_classification
# clf = RandomForestClassifier(n_estimators = 300,random_state=42,bootstrap=True,max_features="auto")
# clf.fit(train, labels)


from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
from sklearn.model_selection import GridSearchCV
rf = RandomForestClassifier()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV (rf, random_grid, cv = 10)
# Fit the random search model
rf_random.fit(train,labels)

In [None]:
rf_random.best_params_

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
train_predictions = rf_random.best_estimator_.predict(train)
train_predictions = train_predictions.argmax(1)
label = labels.to_numpy().argmax(1)
print(accuracy_score(train_data.true_label, train_predictions))
test_pred = rf_random.best_estimator_.predict(test)
test_pred = test_pred.argmax(1)
print(accuracy_score(test_data.true_label, test_pred))

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
import os
import cv2
from glob import glob
images_test = []
testId = []
for image_path in glob(os.path.join("../input/plant-seedlings-classification/test", "*.png")):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = segment_image(image)
    image = cv2.resize(image, (224,224), interpolation = cv2.INTER_AREA)
    images_test.append(image)
    testId.append(image_path.split('/')[-1])

In [None]:
images_test = np.asarray(images_test)
data_frame = predict_models(models,images_test)
data_frame.incep_resnet = data_frame.incep_resnet.astype('category')
data_frame.effinet = data_frame.effinet.astype('category')
data_frame.resnet = data_frame.resnet.astype('category')
data_frame.resnet2 = data_frame.resnet2.astype('category')

In [None]:
data_frame_dummy = pd.get_dummies(data_frame)
pred = rf_random.predict(data_frame_dummy)
pred_number= np.argmax(pred, axis=1)
predStr = le.classes_[pred_number]

res = {'file': testId, 'species': predStr}
res = pd.DataFrame(res)
res.to_csv("submission.csv", index=False)