# Calculation of image quality

Will pull the ImageId from the MongoDB to then grab the associated image from the image server. Then calculate various quality metrics. Finally build a dataframe with the Image ID, and quality scores. Then push back the new info to MongoDB.

In [1]:
import io, os, sys
import requests
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from collections import Counter
from ipywidgets import IntProgress
from IPython.display import display

import skimage as ski
import cv2
import torch
import piq
#import nvidia_smi

sys.path.append('..')

# local library of functions to connect to image server
import calcimetry.use_server as server
from calcimetry.mongo_api import MongoInfo, MongoAPI

In [2]:
img_path = server.init()
mongo_info = MongoInfo()

server : http://imgserver.10.68.0.250.nip.io/data/andra


In [3]:
with MongoAPI(mongo_info=mongo_info) as mongo_api:
    doc = mongo_api.db['images'].find()
    df = pd.DataFrame(list(doc))
df

Unnamed: 0,_id,ImageId,FileName,DrillName,Cote0,Cote1,PxSize,PySize,px0,px1,k_Up,k_Down,k_Arrow
0,632424f13f00570ad37de897,1437,GTR2020-02_0003-OHZ5310_0000_0108.JPG,OHZ5310,0,108,19113,4038,102.0,19073.0,"[[10578, 1263], [10624, 1256], [10860, 1269], ...","[[10542, 2458], [10823, 2360], [11250, 2495], ...","[[10719, 1731], [17861, 1619], [19092, 1667]]"
1,632424f13f00570ad37de898,1438,GTR2020-02_0003-OHZ5310_0078_0199.JPG,OHZ5310,78,199,5158,1019,5.0,5123.0,"[[4, 302], [601, 289], [1553, 300], [3524, 264...","[[8, 599], [378, 604], [974, 586], [1442, 586]...","[[6, 426], [3519, 413], [4531, 417], [5124, 386]]"
2,632424f13f00570ad37de899,1439,GTR2020-02_0003-OHZ5310_0170_0291.JPG,OHZ5310,170,291,5175,1005,18.0,5168.0,"[[11, 279], [653, 273], [1266, 268], [1680, 28...","[[18, 599], [616, 611], [1163, 609], [1256, 60...","[[15, 421], [1039, 417], [1108, 399], [1497, 4..."
3,632424f13f00570ad37de89a,1440,GTR2020-02_0003-OHZ5310_0261_0383.JPG,OHZ5310,261,383,5176,1009,31.0,5169.0,"[[2274, 388], [2387, 339], [2525, 351], [2721,...","[[2226, 615], [2320, 632], [2721, 634], [2989,...","[[2280, 481], [3015, 444], [3936, 429], [4472,..."
4,632424f13f00570ad37de89b,1441,GTR2020-02_0003-OHZ5310_0349_0431.JPG,OHZ5310,349,431,3638,1014,33.0,3540.0,"[[5, 284], [125, 281], [660, 290], [802, 331],...","[[2, 613], [82, 611], [441, 619], [856, 610], ...","[[26, 456], [1314, 470], [1386, 462], [1617, 4..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3063,6324255b3f00570ad37e048f,1267,GTR2019-10_0022_OHZ2023_0259_0381.JPG,OHZ2023,259,381,5181,947,6.0,5171.0,"[[5, 276], [1823, 282], [2428, 279], [2492, 28...","[[3, 532], [933, 538], [1835, 548], [2444, 544...","[[5, 410], [1831, 417], [2943, 414], [4059, 42..."
3064,6324255b3f00570ad37e0490,1268,GTR2019-10_0022_OHZ2023_0347_0402.JPG,OHZ2023,347,402,5184,978,38.0,2353.0,"[[3, 302], [534, 307], [1782, 319], [2316, 323]]","[[6, 557], [835, 568], [2166, 583], [2288, 564]]","[[6, 422], [1046, 432], [1891, 426], [2289, 438]]"
3065,6324255c3f00570ad37e0496,1228,GTR2015-11_0028_OHZ2004_0000_0100.JPG,OHZ2004,0,100,2931,600,25.0,2912.0,"[[54, 151], [137, 159], [331, 157], [345, 144]...","[[76, 309], [117, 327], [481, 326], [607, 325]...","[[53, 246], [776, 245], [2550, 243], [2924, 240]]"
3066,6324255c3f00570ad37e0497,1229,GTR2015-11_0028_OHZ2004_0073_0192.JPG,OHZ2004,73,192,3456,612,27.0,3453.0,"[[414, 174], [760, 169], [774, 160], [1965, 15...","[[420, 342], [764, 345], [782, 333], [1301, 33...","[[361, 257], [2201, 235], [3451, 234]]"


### There is no conisitency on directory names

So get the list of all the files in the images server

In [4]:
files = server.get_list(img_path)
print("nombre de fichiers disponibles", len(files))

Status Code 200
nombre de fichiers disponibles 2268


In [5]:
files[0]

'/data/andra/calci_photos/REP2104/Photos/GTR2004-12_0017_REP2104_0065_0161.jpg'

This only returns a list of files with the extension `.jpg` and not `.JPG`

Function to find string in list of strings (thanks stackoverflow)

In [6]:
def index_containing_substring(the_list, substring):
    for i, s in enumerate(the_list):
        if substring in s:
              return i
    return -1

### The variance of the Laplacian can be a measure of the sharpness of the image, or the focus

In [7]:
def variance_of_laplacian(image):
    # compute the Laplacian of the image and then return the focus
    # measure, which is simply the variance of the Laplacian
    return cv2.Laplacian(image, cv2.CV_64F).var()

### Magnitude of the gradient to get sharpness of edges

In [8]:
def gradient_magnitude(image):
    #Get magnitude of gradient for given image
    ddepth = cv2.CV_64F
    dx = cv2.Sobel(image, ddepth, 1, 0)
    dy = cv2.Sobel(image, ddepth, 0, 1)
    mag = cv2.magnitude(dx, dy)
    return mag

## Colour analysis
https://towardsdatascience.com/building-an-image-color-analyzer-using-python-12de6b0acf74

* First, we are using k-Means to cluster the top colors. Inside the function we are passing the value of how many clusters do we want to divide. Here is the documentation for K-Means clustering. After clustering we predict the colors that weigh the most — meaning getting the most area on the image.
* Secondly, we are calling the Counter function. Counter creates a container to the elements as dictionary keys, and their volume is store as dictionary values. If you are not familiar with dictionaries, they store data in key: value pairs. They are like function, and when you pass in the “key,” you can “value” as a return. Then we are ordering the colors according to the keys.

In [9]:
def prep_image(raw_img):
    modified_img = cv2.resize(raw_img, (900, 600), interpolation = cv2.INTER_AREA)
    modified_img = modified_img.reshape(modified_img.shape[0]*modified_img.shape[1], 3)
    return modified_img

def color_analysis(img):
    clf = KMeans(n_clusters = 5)  # 5 top colours
    color_labels = clf.fit_predict(img)
    center_colors = clf.cluster_centers_
    counts = Counter(color_labels)
    ordered_colors = [center_colors[i] for i in counts.keys()]
    return ordered_colors

### From the dataframe caclulate some metrics

Add some Facebook metrics too: `piq` PyTorch Image Quality
* https://github.com/photosynthesis-team/piq/blob/master/examples/image_metrics.py

In [None]:
Resolution = []
Focus = []
GradientMax = []
GradientSTD = []
Colour1 = []
Colour2 = []
Colour3 = []
Colour4 = []
Colour5 = []
BRISQUE_i = []
BRISQUE_l = []

missing_images = []

for fileName in df['FileName']:
    
    index = index_containing_substring(files, fileName.split('.')[0])
    if index != -1:
        filename = files[index]
    else:
        drillname = df.loc[df['FileName'] == fileName, 'DrillName'].iloc[0]
        filename = f'/data/andra/calci_photos/{drillname}/Photos/{fileName}'
    
    img = server.get_file(filename, quiet=True)
    if img is not None:

        # focus metric
        gray = cv2.cvtColor(np.asarray(img), cv2.COLOR_BGR2GRAY)
        Focus.append(variance_of_laplacian(gray))

        # Gradient metric
        GradientMax.append(np.max(gradient_magnitude(gray)[:]))
        GradientSTD.append(np.std(gradient_magnitude(gray)[:]))

        # Top five colours
        try:
            colours = color_analysis(prep_image(np.asarray(img)))
            Colour1.append(colours[0])
            Colour2.append(colours[1])
            Colour3.append(colours[2])
            Colour4.append(colours[3])
            Colour5.append(colours[4])
        except:
            Colour1.append(np.nan)
            Colour2.append(np.nan)
            Colour3.append(np.nan)
            Colour4.append(np.nan)
            Colour5.append(np.nan)
        
        """
        # pytorch image quality, use try, except loop to keep going if image is too large or does not conform
        try:
            x = torch.tensor(np.asarray(img)).permute(2, 0, 1)[None, ...] / 255.
            if torch.cuda.is_available():
                # Move to GPU to make computaions faster
                # print(torch.cuda.is_available())
                x = x.cuda()
            brisque_index: torch.Tensor = piq.brisque(x, data_range=1., reduction='none')
            brisque_loss: torch.Tensor = piq.BRISQUELoss(data_range=1., reduction='none')(x)

            BRISQUE_i.append(brisque_index.item())
            BRISQUE_l.append(brisque_loss.item())
        except Exception as e:
            print(f'Error in PyTorch with image {fileName}\n')
            print(e)
            BRISQUE_i.append(np.nan)
            BRISQUE_l.append(np.nan)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        del x
        """

    else:  # not an image
        print(f'{fileName} is not a jpg')
        missing_images.append(fileName)
        Focus.append(np.nan)
        GradientMax.append(np.nan)
        GradientSTD.append(np.nan)
        Colour1.append(np.nan)
        Colour2.append(np.nan)
        Colour3.append(np.nan)
        Colour4.append(np.nan)
        Colour5.append(np.nan)
        BRISQUE_i.append(np.nan)
        BRISQUE_l.append(np.nan)



In [None]:
# resolution
dpxdx = (df['px1'] - df['px0']) / (df['Cote1'] - df['Cote0'])

In [None]:
plt.imshow(img)
plt.show()