# Import Lib

In [None]:
import os
import cv2
import math
import random
import pandas as pd 
import numpy as np
from tqdm import tqdm
from sklearn import metrics
from sklearn.model_selection import train_test_split
tqdm.pandas()
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# Get data path

In [None]:
SAMPLE_LEN = 100

IMAGE_PATH = "D:\\Coding_practice\\plan_pathology_kaggle\\images\\"
TRAIN_PATH = "D:\\Coding_practice\\plan_pathology_kaggle\\train.csv"
TEST_PATH =  "D:\\Coding_practice\\plan_pathology_kaggle\\test.csv"
SUB_PATH = "D:\\Coding_practice\\plan_pathology_kaggle\\sample_submission.csv"

sub = pd.read_csv(SUB_PATH)
train_data = pd.read_csv(TRAIN_PATH)
test_data = pd.read_csv(TEST_PATH)

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
def load_image(image_id):
    file_path = image_id + ".jpg"
    image = cv2.imread(IMAGE_PATH + file_path)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

sample_class = np.where(train_data['healthy']==1)
# train_images = train_data['image_id'][list(sample_class)[0][:100]].progress_apply(load_image)
train_images = train_data['image_id'][:SAMPLE_LEN].progress_apply(load_image)

In [None]:
train_images.index
# train_images[18]

# Visualize one leaf

In [None]:
fig = px.imshow(cv2.resize(train_images[random.choice(train_images.index)],(205,136)))
fig.show()

# Channel distribution

In [None]:
red_values = [np.mean(train_images[idx][:,:,0]) for idx in train_images.index]
green_values = [np.mean(train_images[idx][:,:,1]) for idx in train_images.index]
blue_values = [np.mean(train_images[idx][:,:,2]) for idx in train_images.index]

values = [np.mean(train_images[idx]) for idx in train_images.index]

# All channel values

In [None]:
fig = ff.create_distplot([values], group_labels=["Channels"], colors=["purple"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distributrion of channel values")
fig.data[0].marker.line.color = 'rgb(0,0,0)'
fig.data[0].marker.line.width = 1
fig

In [None]:
fig = ff.create_distplot([red_values], group_labels=["R"], colors=["red"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distributrion of red channel values")
fig.data[0].marker.line.color = 'rgb(0,0,0)'
fig.data[0].marker.line.width = 1
fig

In [None]:
fig = ff.create_distplot([green_values], group_labels=["G"], colors=["green"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distributrion of green channel values")
fig.data[0].marker.line.color = 'rgb(0,0,0)'
fig.data[0].marker.line.width = 1
fig

In [None]:
fig = ff.create_distplot([blue_values], group_labels=["B"], colors=["blue"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distributrion of blue channel values")
fig.data[0].marker.line.color = 'rgb(0,0,0)'
fig.data[0].marker.line.width = 1
fig

In [None]:
fig = go.Figure()

for idx, values in enumerate([red_values, green_values, blue_values]):
    if idx == 0:
        color = "Red"
    if idx == 1:
        color = "Green"
    if idx == 2:
        color = "Blue"
    fig.add_trace(go.Box(x=[color] * len(values) , y= values, name=color, marker=dict(color=color.lower())))

fig.update_layout(yaxis_title="Mean value", xaxis_title="Color channel", title="Mean value vs. Color channel",template="plotly_white")

In [None]:
fig = ff.create_distplot([red_values, green_values, blue_values], group_labels=["R","G","B"], colors=["red","green","blue"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distributrion of all channel values")
fig.data[0].marker.line.color = 'rgb(0,0,0)'
fig.data[0].marker.line.width = 1
fig.data[1].marker.line.color = 'rgb(0,0,0)'
fig.data[1].marker.line.width = 1
fig.data[2].marker.line.color = 'rgb(0,0,0)'
fig.data[2].marker.line.width = 1
fig

# Visualize sample leaves

In [None]:
def visualize_leaves(cond=[0, 0, 0, 0], cond_cols=["healthy"], is_cond=True):
    if not is_cond:
        cols, rows = 3, min([3, len(train_images)//3])
        fig, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(30, rows*20/3))
        for col in range(cols):
            for row in range(rows):
                ax[row, col].imshow(train_images.loc[train_images.index[-row*3-col-1]])
        return None
        
    cond_0 = "healthy == {}".format(cond[0])
    cond_1 = "scab == {}".format(cond[1])
    cond_2 = "rust == {}".format(cond[2])
    cond_3 = "multiple_diseases == {}".format(cond[3])
    
    cond_list = []
    for col in cond_cols:
        if col == "healthy":
            cond_list.append(cond_0)
        if col == "scab":
            cond_list.append(cond_1)
        if col == "rust":
            cond_list.append(cond_2)
        if col == "multiple_diseases":
            cond_list.append(cond_3)
    
    data = train_data.loc[:100]
    for cond in cond_list:
        data = data.query(cond)
        
    images = train_images.loc[list(data.index)]
    cols, rows = 3, min([3, len(images)//3])
    
    fig, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(30, rows*20/3))
    for col in range(cols):
        for row in range(rows):
            ax[row, col].imshow(images.loc[images.index[row*3+col]])
    plt.show()

In [None]:
visualize_leaves(cond=[0, 1, 0, 0], cond_cols=["scab"])

In [None]:
fig = px.parallel_categories(train_data[["healthy","scab","rust","multiple_diseases"]], color='healthy', color_continuous_scale="sunset",\
title="Parallel categories plot of targets")
fig

In [None]:
fig = go.Figure([go.Pie(labels=train_data.columns[1:], values=train_data.iloc[:, 1:].sum().values)])

fig.update_layout(title_text="Pie chart of targets", template='simple_white')
fig.data[0].marker.line.color='rgb(0,0,0)'
fig.data[0].marker.line.width=0.5
fig.show()

In [None]:
train_data.columns.values[1:]

In [None]:
train_data["Healthy"] = train_data["healthy"].apply(bool).apply(str)
train_data["Multiple_diseases"] = train_data["multiple_diseases"].apply(bool).apply(str)
train_data["Rust"] = train_data['rust'].apply(bool).apply(str)
train_data["Scrab"] = train_data['scab'].apply(bool).apply(str)

# print(train_data["Healthy"].values)
print(np.sum(train_data["Healthy"].values=='True'))
# print(sum(np.where(train_data["Healthy"].values=='True')))
# df = px.data.tips()
# print(df)
fig = go.Figure()
# fig.add_trace(go.Histogram(x=["Healthy"], y=[train_data["Healthy"].values=='True'], name='count'))
fig = px.histogram(train_data, x="Rust", title="Data distribution", color="Rust",\
            color_discrete_map={
                "True": px.colors.qualitative.Plotly[0],
                "False": px.colors.qualitative.Plotly[1]})
fig.update_layout(template="simple_white")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig.data[1].marker.line.color = 'rgb(0, 0, 0)'
fig.data[1].marker.line.width = 0.5
fig.show()

In [None]:
def edge_and_cut(img):
    emb_img = img.copy()
    edges = cv2.Canny(img, 100, 200)
    edge_coors = []
    for i in range(edges.shape[0]):
        for j in range(edges.shape[1]):
            if edges[i][j] != 0:
                edge_coors.append((i, j))
    
    row_min = edge_coors[np.argsort([coor[0] for coor in edge_coors])[0]][0]
    row_max = edge_coors[np.argsort([coor[0] for coor in edge_coors])[-1]][0]
    col_min = edge_coors[np.argsort([coor[1] for coor in edge_coors])[0]][1]
    col_max = edge_coors[np.argsort([coor[1] for coor in edge_coors])[-1]][1]
    new_img = img[row_min:row_max, col_min:col_max]
    
    emb_img[row_min-10:row_min+10, col_min:col_max] = [255, 0, 0]
    emb_img[row_max-10:row_max+10, col_min:col_max] = [255, 0, 0]
    emb_img[row_min:row_max, col_min-10:col_min+10] = [255, 0, 0]
    emb_img[row_min:row_max, col_max-10:col_max+10] = [255, 0, 0]
    
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(30, 20))
    ax[0].imshow(img, cmap='gray')
    ax[0].set_title('Original Image', fontsize=24)
    ax[1].imshow(edges, cmap='gray')
    ax[1].set_title('Canny Edges', fontsize=24)
    ax[2].imshow(emb_img, cmap='gray')
    ax[2].set_title('Bounding Box', fontsize=24)
    plt.show()

In [None]:
edge_and_cut(train_images[random.choice(train_images.index)])

In [None]:
def channels_visualize(img):
    process_img = img.copy()
    r_channel = process_img[:,:,0]
    g_channel = process_img[:,:,1]
    b_channel = process_img[:,:,2]
    fig, ax = plt.subplots(nrows=1, ncols=4, figsize=(30, 20))
    ax[0].imshow(r_channel)
    ax[0].set_title('Red channel Image', fontsize=24)
    ax[1].imshow(g_channel)
    ax[1].set_title('Green channel Image', fontsize=24)
    ax[2].imshow(b_channel)
    ax[2].set_title('Blue channel Image', fontsize=24)
    ax[3].imshow(process_img, cmap='gray')
    ax[3].set_title('Original Image', fontsize=24)
    plt.show()

In [None]:
channels_visualize(train_images[random.choice(train_images.index)])

In [None]:
def conv(img):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 20))
    kernel = np.ones((7, 7), np.float32)/25
    conv = cv2.filter2D(img, -1, kernel)
    ax[0].imshow(img)
    ax[0].set_title('Original Image', fontsize=24)
    ax[1].imshow(conv)
    ax[1].set_title('Convolved Image', fontsize=24)
    plt.show()

In [None]:
conv(train_images[random.choice(train_images.index)])

In [None]:
def blur(img):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 20))
    ax[0].imshow(img)
    ax[0].set_title('Original Image', fontsize=24)
    ax[1].imshow(cv2.blur(img, (25, 25)))
    ax[1].set_title('Blurred Image', fontsize=24)
    plt.show()

In [None]:
blur(train_images[random.choice(train_images.index)])