# Libraries

In [None]:
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
import cv2
import json
from glob import glob
import os
import tqdm
from collections import defaultdict
import pandas as pd
from utils import read_img

# Metadata

In [None]:
format = '.png'

In [None]:
def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

In [None]:
colors_file = r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\src\metadata\rgb_colors.json"
porosty_file = r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\src\metadata\porosty_info.json"

colors = load_json(colors_file)
porosty = load_json(porosty_file)

In [None]:
colors_file = r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\src\metadata\rgb_colors.json"
porosty_file = r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\src\metadata\porosty_info.json"

colors = load_json(colors_file)
porosty = load_json(porosty_file)

In [None]:
colors

In [None]:
porosty

In [None]:
indexed_colors = [{'color_value': np.array(colors[c['color']]),
                  'index': c['index']}
                    for c in porosty[0]['classes']]
indexed_colors

In [None]:
NUM_CLASSES = len(porosty[0]['classes'])

# Processing functions

In [None]:
unique = {0, 255}

def convert2rgb(image):
    image = np.array(image)
    image = np.where(image < 128, 0, 255)
    condition = ~(image[:, :, 3] == 0)
    conds = np.stack([condition for _ in range(4)], axis=-1)
    image = np.where(conds, image, 0)
    return cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGBA2RGB)

In [None]:
def convert2mask(image):
    mask = np.zeros_like(image[:, :, 0])

    for elem in indexed_colors:
        condition = np.all(image == elem['color_value'], axis=-1)
        mask = np.where(condition, elem['index'], mask)
    return mask

# Pipeline example

In [None]:
im = Image.open(r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Имидж_АиВТ\Ю.Сюрхар\image_masks\351348_4.png")
im

In [None]:
img = convert2rgb(im)
plt.figure(figsize=(20, 20))
plt.imshow(img);

In [None]:
mask = convert2mask(img)
plt.imshow(mask * (255 // 5), cmap='viridis');

In [None]:
mask_im = Image.fromarray(mask * 10)
mask_im

In [None]:
mask_im.save('test.png')

In [None]:
mask_readed = Image.open("test.png")
mask_readed = np.array(mask_readed)
mask_readed.shape

In [None]:
np.unique(mask_readed)

# Images processing

In [None]:
image_mask_and_mask_paths = {
    r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Имидж_АиВТ\Ю.Сюрхар\image_masks":
    r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Имидж_АиВТ\Ю.Сюрхар\masks",

    r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Имидж_АиВТ\Сихор_32101\image_masks":
    r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Имидж_АиВТ\Сихор_32101\masks"
}

In [None]:
# for image_mask_path, mask_path in image_mask_and_mask_paths.items():

#     assert os.path.exists(image_mask_path), f"Path {mask_path} does not exist"

#     paths = glob(image_mask_path + r'\*.png')
#     for path in tqdm.tqdm(paths, total=len(paths)):
#         image_name = os.path.basename(path)
#         try:
#             mask = convert2mask(
#                 convert2rgb(
#                     Image.open(path)
#                 )
#             )
#             mask = Image.fromarray(mask)
#         except Exception as e:
#             print(f'Error "{e}" with image {image_name}')
#             break
#         mask.save(mask_path + '\\' + image_name)

# Masks Analisys

In [None]:
mask_paths = (
    glob(r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Images\Sihor\masks\*.png")
    +
    glob(r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\data\Images\Surhar\masks\*.png")
)
len(mask_paths)

In [None]:
info_fields = ['name'] + [f'class{i}' for i in range(NUM_CLASSES)] + [f'class{i}Percent' for i in range(NUM_CLASSES)]
info_fields

In [None]:
info = defaultdict(list)

In [None]:
pd.DataFrame(info)

In [None]:
def get_mask_info(mask_path: str) -> dict:
    mask = read_img(mask_path, rgb=False)
    num_pixels = mask.shape[0] * mask.shape[1]
    info = {'name': os.path.basename(mask_path)}
    
    for class_index in range(NUM_CLASSES):
        class_percentage = (mask == class_index).sum() / num_pixels * 100
        info[f'class{class_index}'] = class_percentage != 0
        info[f'class{class_index}Percent'] = class_percentage
    return info
    

In [None]:
masks_info = list()
for mask_path in tqdm.tqdm(mask_paths, total=len(mask_paths)):
    masks_info.append(get_mask_info(mask_path))
masks_info

In [None]:
classes_i = [f'class{i}' for i in range(NUM_CLASSES)]

masks_info_df = pd.DataFrame(masks_info)[info_fields]
masks_info_df[classes_i] = masks_info_df[classes_i].astype(int)
masks_info_df

In [None]:
masks_info_df[info_fields[1:]].mean()

In [None]:
masks_info_df[info_fields[1:]].std()

In [None]:
masks_info_df.loc[(masks_info_df['class4'] != 0) & (masks_info_df['class5'] != 0)]

In [None]:
masks_info_df.to_excel(r"C:\Users\Viktor\Documents\IT\ReservoirRockAnalysis\src\metadata\masks_info.xlsx")