In [13]:
# ! pip3 install --user pandas
# ! pip3 install --user numpy
# ! pip3 install --user seaborn
# ! pip3 install --user matplotlib==3.1.3
# ! pip3 install --user plotly

Load color analysis results into a single dataset.

In [14]:
import os
from os.path import join
from glob import glob
import csv

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt

images = glob('../data_sm/*.JPG')
print("Input images:", len(images))

results = glob('../output_sm/*.csv')
print("Result files:", len(results))

headers = []
rows = []
for result in results:
    with open(result, 'r') as file:
        reader = csv.reader(file)
        if len(headers) == 0: headers = next(reader, None)
        else: next(reader, None)
        for row in reader: rows.append(row)

df = pd.DataFrame(rows, columns=headers)
print(df)

Input images: 6
Result files: 6
                         Image Plant      Hex                    R  \
0    1_14_19.10_30_20.5V4B3127     0  #b27d56   0.6980392156862745   
1    1_14_19.10_30_20.5V4B3127     0  #805e3a   0.5019607843137255   
2    1_14_19.10_30_20.5V4B3127     0  #cc9f6e                  0.8   
3    1_14_19.10_30_20.5V4B3127     0  #8a4631   0.5411764705882353   
4    1_14_19.10_30_20.5V4B3127     0  #6d4629  0.42745098039215684   
..                         ...   ...      ...                  ...   
318  1_14_19.10_30_20.5V4B3123     5  #635d44  0.38823529411764707   
319  1_14_19.10_30_20.5V4B3123     5  #8a832c   0.5411764705882353   
320  1_14_19.10_30_20.5V4B3123     5  #794d26   0.4745098039215686   
321  1_14_19.10_30_20.5V4B3123     5  #ddcf6d   0.8666666666666667   
322  1_14_19.10_30_20.5V4B3123     5  #4d4a31  0.30196078431372547   

                       G                    B    Freq  
0    0.49019607843137253  0.33725490196078434   68679  
1     0.3686274

Extract treatment from image name.

In [15]:
df['Treatment'] = df.apply(lambda row: 'Control' if 'Control' in row['Image'] else ('MaxSea' if 'MaxSea' in row['Image'] else ('CalMag' if 'CalMag' in row['Image'] else np.NaN)), axis=1)
print(df)

                         Image Plant      Hex                    R  \
0    1_14_19.10_30_20.5V4B3127     0  #b27d56   0.6980392156862745   
1    1_14_19.10_30_20.5V4B3127     0  #805e3a   0.5019607843137255   
2    1_14_19.10_30_20.5V4B3127     0  #cc9f6e                  0.8   
3    1_14_19.10_30_20.5V4B3127     0  #8a4631   0.5411764705882353   
4    1_14_19.10_30_20.5V4B3127     0  #6d4629  0.42745098039215684   
..                         ...   ...      ...                  ...   
318  1_14_19.10_30_20.5V4B3123     5  #635d44  0.38823529411764707   
319  1_14_19.10_30_20.5V4B3123     5  #8a832c   0.5411764705882353   
320  1_14_19.10_30_20.5V4B3123     5  #794d26   0.4745098039215686   
321  1_14_19.10_30_20.5V4B3123     5  #ddcf6d   0.8666666666666667   
322  1_14_19.10_30_20.5V4B3123     5  #4d4a31  0.30196078431372547   

                       G                    B    Freq Treatment  
0    0.49019607843137253  0.33725490196078434   68679       NaN  
1     0.3686274509803922  0

Drop rows with unknown treatment (TODO: ask Mason about unlabeled images).

In [16]:
df.dropna(how='any', inplace=True)
print(df)

                       Image Plant      Hex                    R  \
54   1_14_19.CalMag.5V4B3120     0  #a68f6a   0.6509803921568628   
55   1_14_19.CalMag.5V4B3120     0  #674827    0.403921568627451   
56   1_14_19.CalMag.5V4B3120     0  #605940   0.3764705882352941   
57   1_14_19.CalMag.5V4B3120     0  #816849   0.5058823529411764   
58   1_14_19.CalMag.5V4B3120     0  #8d6330   0.5529411764705883   
..                       ...   ...      ...                  ...   
210  1_14_19.CalMag.5V4B3117     5  #8c7963   0.5490196078431373   
211  1_14_19.CalMag.5V4B3117     5  #3e2c1d  0.24313725490196078   
212  1_14_19.CalMag.5V4B3117     5  #5a4736  0.35294117647058826   
213  1_14_19.CalMag.5V4B3117     5  #72604e   0.4470588235294118   
214  1_14_19.CalMag.5V4B3117     5  #675543    0.403921568627451   

                       G                    B   Freq Treatment  
54    0.5607843137254902  0.41568627450980394  19575    CalMag  
55    0.2823529411764706  0.15294117647058825  32706 

Add columns for HSV color representation.

In [17]:
from colorsys import rgb_to_hsv

def to_hsv(row):
    hsv = rgb_to_hsv(float(row['R']), float(row['G']), float(row['B']))
    return [hsv[0], hsv[1], hsv[2]]

df['H'], df['S'], df['V'] = zip(*df.apply(lambda row: to_hsv(row), axis=1))
print(df)

                       Image Plant      Hex                    R  \
54   1_14_19.CalMag.5V4B3120     0  #a68f6a   0.6509803921568628   
55   1_14_19.CalMag.5V4B3120     0  #674827    0.403921568627451   
56   1_14_19.CalMag.5V4B3120     0  #605940   0.3764705882352941   
57   1_14_19.CalMag.5V4B3120     0  #816849   0.5058823529411764   
58   1_14_19.CalMag.5V4B3120     0  #8d6330   0.5529411764705883   
..                       ...   ...      ...                  ...   
210  1_14_19.CalMag.5V4B3117     5  #8c7963   0.5490196078431373   
211  1_14_19.CalMag.5V4B3117     5  #3e2c1d  0.24313725490196078   
212  1_14_19.CalMag.5V4B3117     5  #5a4736  0.35294117647058826   
213  1_14_19.CalMag.5V4B3117     5  #72604e   0.4470588235294118   
214  1_14_19.CalMag.5V4B3117     5  #675543    0.403921568627451   

                       G                    B   Freq Treatment         H  \
54    0.5607843137254902  0.41568627450980394  19575    CalMag  0.102778   
55    0.2823529411764706  0.152

Compute aggregate stats for each treatment separately.

In [24]:
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go
# from mpl_toolkits.mplot3d import Axes3D
# %matplotlib inline

treatments = list(np.unique(df['Treatment']))
for treatment in treatments:
    subset = df[df['Treatment'] == treatment]
    print(treatment + ":", len(subset))
    
    colors = subset.loc[:, ['R', 'G', 'B', 'Freq']].head(100)
    colors['R'] = colors['R'].astype(float) * 256
    colors['G'] = colors['G'].astype(float) * 256
    colors['B'] = colors['B'].astype(float) * 256
    colors['Freq'] = colors['Freq'].astype(int)
    print(colors)

    fig = go.Figure()
    colors_map = [f'rgb({c[0]}, {c[1]}, {c[2]})' for c in list(colors.apply(
        lambda r: (float(r['R']), float(r['G']), float(r['B'])), axis=1))]
    sizes_map = [(int(f) / 2000) for f in list(colors['Freq'])]
    trace=dict(type='scatter3d',
               x=colors['R'],
               y=colors['G'],
               z=colors['B'],
               mode='markers',
               marker=dict(color=colors_map, size=sizes_map))
    fig.add_trace(trace)
    fig.update_layout(title=treatment, scene=dict(
        xaxis_title='G',
        yaxis_title='R',
        zaxis_title='B'))
    fig.show()

CalMag: 161
              R           G           B   Freq
54   166.650980  143.560784  106.415686  19575
55   103.403922   72.282353   39.152941  32706
56    96.376471   89.349020   64.250980  28566
57   129.505882  104.407843   73.286275  46575
58   141.552941   99.388235   48.188235   7932
..          ...         ...         ...    ...
149  125.490196   67.262745   59.231373  45741
150   86.337255   83.325490   35.137255  17151
151  100.392157  112.439216   35.137255  33696
152  131.513725  105.411765   47.184314  30633
153  156.611765   75.294118   43.168627   6381

[100 rows x 4 columns]
