In [1]:
# ! pip3 install --user pandas
# ! pip3 install --user numpy
# ! pip3 install --user seaborn
# ! pip3 install --user matplotlib==3.1.3
# ! pip3 install --user plotly

Load color analysis results into a single dataset.

In [2]:
import os
from os.path import join
from glob import glob
import csv

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt

images = glob('../data_sm/*.JPG')
print("Input images:", len(images))

results = glob('../output_sm/*.csv')
print("Result files:", len(results))

headers = []
rows = []
for result in results:
    with open(result, 'r') as file:
        reader = csv.reader(file)
        if len(headers) == 0: headers = next(reader, None)
        else: next(reader, None)
        for row in reader: rows.append(row)

df = pd.DataFrame(rows, columns=headers)
print(df)

Input images: 9
Result files: 9
                         Image Plant      Hex                    R  \
0    10_14_19.Control.5V4B9759     0  #7e7c31  0.49411764705882355   
1    10_14_19.Control.5V4B9759     0  #d3c988   0.8274509803921568   
2    10_14_19.Control.5V4B9759     0  #a07a4d   0.6274509803921569   
3    10_14_19.Control.5V4B9759     0  #665525                  0.4   
4    10_14_19.Control.5V4B9759     0  #a99f50   0.6627450980392157   
..                         ...   ...      ...                  ...   
465   10_14_19.Calmag.5V4B9763     4  #58472f  0.34509803921568627   
466   10_14_19.Calmag.5V4B9763     5  #414432   0.2549019607843137   
467   10_14_19.Calmag.5V4B9763     5  #42432f  0.25882352941176473   
468   10_14_19.Calmag.5V4B9763     5  #625d40   0.3843137254901961   
469   10_14_19.Calmag.5V4B9763     5  #4a462e   0.2901960784313726   

                       G                    B    Freq  
0    0.48627450980392156  0.19215686274509805  160116  
1      0.788235

Extract treatment from image name.

In [3]:
df['Treatment'] = df.apply(lambda row: 'Control' if 'control' in row['Image'].lower() else ('MaxSea' if 'maxsea' in row['Image'].lower() else ('CalMag' if 'calmag' in row['Image'].lower() else np.NaN)), axis=1)
print(df)

                         Image Plant      Hex                    R  \
0    10_14_19.Control.5V4B9759     0  #7e7c31  0.49411764705882355   
1    10_14_19.Control.5V4B9759     0  #d3c988   0.8274509803921568   
2    10_14_19.Control.5V4B9759     0  #a07a4d   0.6274509803921569   
3    10_14_19.Control.5V4B9759     0  #665525                  0.4   
4    10_14_19.Control.5V4B9759     0  #a99f50   0.6627450980392157   
..                         ...   ...      ...                  ...   
465   10_14_19.Calmag.5V4B9763     4  #58472f  0.34509803921568627   
466   10_14_19.Calmag.5V4B9763     5  #414432   0.2549019607843137   
467   10_14_19.Calmag.5V4B9763     5  #42432f  0.25882352941176473   
468   10_14_19.Calmag.5V4B9763     5  #625d40   0.3843137254901961   
469   10_14_19.Calmag.5V4B9763     5  #4a462e   0.2901960784313726   

                       G                    B    Freq Treatment  
0    0.48627450980392156  0.19215686274509805  160116   Control  
1      0.788235294117647   

Drop rows with unknown treatment (TODO: ask Mason about unlabeled images).

In [4]:
df.dropna(how='any', inplace=True)
print(df)

                         Image Plant      Hex                    R  \
0    10_14_19.Control.5V4B9759     0  #7e7c31  0.49411764705882355   
1    10_14_19.Control.5V4B9759     0  #d3c988   0.8274509803921568   
2    10_14_19.Control.5V4B9759     0  #a07a4d   0.6274509803921569   
3    10_14_19.Control.5V4B9759     0  #665525                  0.4   
4    10_14_19.Control.5V4B9759     0  #a99f50   0.6627450980392157   
..                         ...   ...      ...                  ...   
465   10_14_19.Calmag.5V4B9763     4  #58472f  0.34509803921568627   
466   10_14_19.Calmag.5V4B9763     5  #414432   0.2549019607843137   
467   10_14_19.Calmag.5V4B9763     5  #42432f  0.25882352941176473   
468   10_14_19.Calmag.5V4B9763     5  #625d40   0.3843137254901961   
469   10_14_19.Calmag.5V4B9763     5  #4a462e   0.2901960784313726   

                       G                    B    Freq Treatment  
0    0.48627450980392156  0.19215686274509805  160116   Control  
1      0.788235294117647   

Add columns for HSV color representation.

In [5]:
from colorsys import rgb_to_hsv, hsv_to_rgb

def to_hsv(row):
    hsv = rgb_to_hsv(float(row['R']), float(row['G']), float(row['B']))
    return [hsv[0], hsv[1], hsv[2]]

df['H'], df['S'], df['V'] = zip(*df.apply(lambda row: to_hsv(row), axis=1))
print(df)

                         Image Plant      Hex                    R  \
0    10_14_19.Control.5V4B9759     0  #7e7c31  0.49411764705882355   
1    10_14_19.Control.5V4B9759     0  #d3c988   0.8274509803921568   
2    10_14_19.Control.5V4B9759     0  #a07a4d   0.6274509803921569   
3    10_14_19.Control.5V4B9759     0  #665525                  0.4   
4    10_14_19.Control.5V4B9759     0  #a99f50   0.6627450980392157   
..                         ...   ...      ...                  ...   
465   10_14_19.Calmag.5V4B9763     4  #58472f  0.34509803921568627   
466   10_14_19.Calmag.5V4B9763     5  #414432   0.2549019607843137   
467   10_14_19.Calmag.5V4B9763     5  #42432f  0.25882352941176473   
468   10_14_19.Calmag.5V4B9763     5  #625d40   0.3843137254901961   
469   10_14_19.Calmag.5V4B9763     5  #4a462e   0.2901960784313726   

                       G                    B    Freq Treatment         H  \
0    0.48627450980392156  0.19215686274509805  160116   Control  0.162338   
1    

Next, RGB-space k-means clustering for each treatment.

In [48]:
from pprint import pprint
from collections import Counter

from scipy.cluster.vq import kmeans, kmeans2
import plotly.graph_objects as go
import plotly.express as px

def rgb2hex(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2])).replace('-', '')

def hue_to_rgb(hue):
    r, g, b = hsv_to_rgb(hue, 0.7, 0.7)
    return float(r), float(g), float(b)

def get_rgb(k):
    r, g, b = hue_to_rgb(float(k / 360))
    return f"rgb({int(r * 256)},{int(g * 256)},{int(b * 256)})"

def hsv_analysis(subset):
    ranges = {((k * 10) + 5):list(range(10 * k, (10 * k) + 10)) for k in range(0, 36)}
    ranges_round = {min(v):k for k, v in ranges.items()}

    subset_hsv = subset[['H', 'S', 'V']].astype(float)
    subset_hsv['HH'] = subset_hsv.apply(lambda row: int(float(row['H']) * 360), axis=1)
    subset_hsv['Band'] = subset_hsv.apply(lambda row: ranges_round[round(int(row['HH']), -1)], axis=1)
    counts = dict(Counter(subset_hsv['Band']))
    counts_keys = list(counts.keys())
    for key in [k for k in ranges.keys() if k not in counts_keys]:
        counts[key] = 0

    color_map = {k:get_rgb(k) for k in counts.keys()}
    counts_df = pd.DataFrame(
        zip(counts.keys(), counts.values()),
        columns=['band', 'count'])
    counts_df.sort_values(['band'], ascending=True, inplace=True)

    fig = px.bar_polar(counts_df, r='count', theta='band', color='band', color_discrete_map=color_map)
    fig.show()



    # counts = dict()
    # for i, row in subset_hsv.iterrows():
    #     r, g, b = (row['Band_R'], row['Band_G'], row['Band_B'])
    #     counts[(float(r), float(g), float(b))] = len(subset_hsv[(subset_hsv['Band_R'] == row['Band_R']) & (subset_hsv['Band_G'] == row['Band_G']) & (subset_hsv['Band_B'] == row['Band_B'])])

    # counter = dict(Counter(subset_hsv.loc[:, ['Band_R', 'Band_G', 'Band_B']].apply(lambda r: rgb2hex((r['Band_R'], r['Band_G'], r['Band_B'])), axis=1).tolist()))
    # pprint(counter)
    # x = list(counts.keys())
    # y = list(counts.values())
    # hp = sns.histplot(x=[str(k) for k in x], weights=y, hue=x, pal# ette=x, discrete=True)
    # plt.xticks(rotation=60)
    # plt.legend().remove()
    # plt.title(f"{treatment} color distribution")
    # plt.show()
    # plt.clf()


def rgb_analysis(subset):
    subset_rgb = subset[['R', 'G', 'B']].astype(float).values.tolist()
    centers, labels = kmeans2(subset_rgb, 25)
    counter = dict(Counter(labels))
    counts = {(int(float(c[0]) * 256), int(float(c[1]) * 256), int(float(c[2]) * 256)): counter[l] for c, l in zip(centers, labels)}
    total = sum(counts.values())
    props = {k: (v / total) for k, v in counts.items()}

    plt.imshow([list(props.keys())])
    plt.show()

    x = list([rgb2hex(k) for k in props.keys()])
    y = list(props.values())
    hp = sns.histplot(x=x, weights=y, hue=x, palette=x, discrete=True)
    plt.xticks(rotation=60)
    plt.legend().remove()
    plt.title(f"{treatment} color distribution")
    plt.show()
    plt.clf()

    fig = go.Figure()
    r = [k[0] for k in props.keys()]
    g = [k[1] for k in props.keys()]
    b = [k[2] for k in props.keys()]
    colors_map = [f'rgb({c[0]}, {c[1]}, {c[2]})' for c in props.keys()]
    sizes_map = list([v * 1000 for v in props.values()])
    trace=dict(type='scatter3d', x=r, y=g, z=b, mode='markers', marker=dict(color=colors_map, size=sizes_map))
    fig.add_trace(trace)
    fig.update_layout(title=treatment, scene=dict(xaxis_title='G', yaxis_title='R', zaxis_title='B'))
    fig.show()

treatments = list(np.unique(df['Treatment']))
for treatment in treatments:
    # get subset corresponding to this treatment
    subset = df[df['Treatment'] == treatment]
    print(treatment + ":", len(subset))

    # rgb_analysis(subset)
    hsv_analysis(subset)

    # colors = subset.loc[:, ['R', 'G', 'B', 'Freq']].head(100)
    # colors['R'] = colors['R'].astype(float) * 256
    # colors['G'] = colors['G'].astype(float) * 256
    # colors['B'] = colors['B'].astype(float) * 256
    # colors['Freq'] = colors['Freq'].astype(int)
    # print(colors)

    # fig = go.Figure()
    # colors_map = [f'rgb({c[0]}, {c[1]}, {c[2]})' for c in list(colors.apply(
    #     lambda r: (float(r['R']), float(r['G']), float(r['B'])), axis=1))]
    # sizes_map = [(int(f) / 2000) for f in list(colors['Freq'])]
    # trace=dict(type='scatter3d',
    #            x=colors['R'],
    #            y=colors['G'],
    #            z=colors['B'],
    #            mode='markers',
    #            marker=dict(color=colors_map, size=sizes_map))
    # fig.add_trace(trace)
    # fig.update_layout(title=treatment, scene=dict(
    #     xaxis_title='G',
    #     yaxis_title='R',
    #     zaxis_title='B'))
    # fig.show()

CalMag: 147


Control: 161


MaxSea: 162


TODO: define a series of color slices (bins), compute proportions of each treatment falling into each bin

TODO: compute total pixel counts for each treatment as a proxy for pitcher size
(later try a real pitcher segmentation, but for now just make an overall count)