## Imports

In [112]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from bs4 import BeautifulSoup
import requests
import urllib.request
from urllib.parse import urlparse
import os
import nltk
import spacy
import locationtagger
from colorthief import ColorThief
from geopy import geocoders, Nominatim
import json
import colorsys

In [113]:
gn = Nominatim(user_agent="Your_Name")

## Web scraping
Retrieve the latest website data

In [114]:
# Retrieve the relevant website data, set an index, and put it into a dataframe
url1 = 'http://en.wikipedia.org/wiki/List_of_works_by_Vincent_van_Gogh'
tables = pd.read_html(url1)
phases = tables[1:7]

for i, ph in enumerate(phases):
    phases[i] = ph.drop(['#', 'Image', 'Medium,Dimensions', 'Catalogue No.'], axis=1)
vgph = pd.concat(phases)

vgph.index = range(len(vgph))

# Set workbook
vgph1 = vgph.copy()

## Pre-processing
Map the dates to seasons and extract the cities in which the works are currently located

In [115]:
# Establish patterns to search for in pre-processing
#    Month data
months_mapping = ['January', 'February', 'March',
         'April', 'May', 'June', 'July',
         'August', 'September', 'October',
         'November', 'December', 'Spring',
         'Summer', 'Autumn', 'Winter']
month_pattern = '|'.join(months_mapping)

#    Ownership data
owner_pattern = ['Private', 'Private Collection', 'Private Collections',
                    'Unknown', 'Stolen', 'Location']
ownership_pattern = '|'.join(owner_pattern)

In [116]:
# Helper functions to map 'Date' to seasons
def seasonal_search(search_str:str, search_list:str):
    search_obj = re.search(search_list, search_str)
    if search_obj:
        return_str = search_str[search_obj.start():search_obj.end()]
    else: 
        return_str = ''
    return return_str

def season_map(month):
    if month == 'March' or month == 'April' or month == 'May' or month == 'Spring':
        return 'Spring'
    if month == 'June' or month == 'July' or month == 'August' or month == 'Summer':
        return 'Summer'
    if month == 'September' or month == 'October' or month == 'November' or month == 'Autumn':
        return 'Autumn'
    if month == 'December' or month == 'January' or month == 'February' or month == 'Winter': 
        return 'Winter'
    
# Helper function to map 'Current location' to an ownership pattern
def ownership_search(search_str:str, search_list:str):
    search_obj = re.search(search_list, search_str)
    if search_obj:
        return_str = search_str[search_obj.start():search_obj.end()]
    else: 
        return_str = 'Museum'
    return return_str

# Helper function to retrieve the stripped citynames
def loc_map(loc):
    if loc == 'Villa':
        return 'Winterthur'
    if loc == 'Clark':
        return 'Williamstown'
    else:
        return loc

In [117]:
# Pre-processing
#     Correct for date/time information
vgph1['Year'] = vgph1['Date']
vgph1['Year'] = vgph1['Year'].str.replace(r'\D', '', regex=True)
vgph1['Year'] = vgph1['Year'].map(str).apply(lambda val: val[:4] if val[:2]=='18' else val[-4:])
vgph1['Month'] = vgph1['Date'].str.replace(r'\d+', '', regex=True).replace('\W+','', regex=True)
vgph1['Month'] = vgph1['Month'].apply(lambda x: seasonal_search(search_str=x, search_list=month_pattern))

#     Draw 'Season' from 'Month'
vgph1['Season'] = vgph1['Month'].apply(lambda x: season_map(x))

#    Draw ownership from 'Current Location' (run only once)
vgph1['Ownership'] = vgph1['Current location'].apply(lambda x: ownership_search(search_str=x, search_list=ownership_pattern))

#     Draw 'Place_Name' from 'Current Location' (run only once)
# df_temp = pd.DataFrame()
# df_temp2 = pd.DataFrame()
# df_temp['Extracted location'] = vgph1['Current location'].apply(lambda x: locationtagger.find_locations(text = x))
# df_temp['Tagged city'] = df_temp['Extracted location'].apply(lambda x: x.cities).astype(str)
# df_temp['Tagged city'] = df_temp['Tagged city'].str.replace('[','', regex=True).replace(']','', regex=True).replace('Van','', regex=True).replace("'", '', regex=True)
# df_temp2 = df_temp['Tagged city'].str.split(',', expand=True)
# vgph1['Current city'] = df_temp2[0].apply(lambda x: loc_map(x))

#    Retrieve latitude and longitude from city names (run only once)
# latitude, longitude = [], []
# for loc in vgph1['Current city']:
#     if (loc==''):
#         latitude.append('')
#         longitude.append('')
#     else:
#         location = gn.geocode(loc)
#         latitude.append(location.latitude)
#         longitude.append(location.longitude)

# vgph1['Current x'] = latitude
# vgph1['Current y'] = longitude

## Image scraping
Scrape images and infromation from the website and run the color classification

In [118]:
# Obtain the title and url of the images with BeautifulSoup 
img_titles = []
img_urls = []
    
r = requests.get('http://en.wikipedia.org/wiki/List_of_works_by_Vincent_van_Gogh') 
soup = BeautifulSoup(r.text, 'html.parser') 

for item in soup.find_all('img'):
    img_titles.append(item['alt'])
    img_urls.append('https:' + item['src'])
    
# Add the file name and url to the dataframe
# Omit the first img featured (Vincent van Gogh portrait)
vgph1['.jpg name'] = img_titles[1:len(vgph1)+1]
vgph1['.jpg url'] = img_urls[1:len(vgph1)+1]

In [119]:
# Download images (only run once)

# def imagedown(df, url, folder):
    
#     try:
#         os.mkdir(os.path.join(os.getcwd(), folder))
#     except:
#         pass
#     os.chdir(os.path.join(os.getcwd(), folder))
    
#     r = requests.get(url)
#     soup = BeautifulSoup(r.text, 'html.parser')
#     images = soup.find_all('img')
#     images_lic = images[1:len(df)+1]
    
#     for i, image in enumerate(images_lic):
#         name = image['alt']
#         link = 'https:' + image['src']
#         index = 'vg{:0>3}_'.format(i)
#         with open(index + name.replace('jpeg', '').replace('JPG', '').replace('jpg', '').replace('"', '').replace('?', '').replace(':', '-').replace('*', '').replace('<', '').replace('>', '').replace('|', '-').replace('/', '-').replace('(', '-').replace(')', '-').replace(' ', '-') + 'jpg', 'wb') as f:
#             im = requests.get(link, headers={'Host': 'upload.wikimedia.org'})
#             f.write(im.content)

# imagedown(vgph1, 'http://en.wikipedia.org/wiki/List_of_works_by_Vincent_van_Gogh', 'van_gogh_paintings')

In [120]:
# Helper function to retrieve and save dominant colors and color palette from the paintings
def colorscrape(df, directory):
    dominant_r, dominant_g, dominant_b, dominant_rgb, palette = [], [], [], [], []
    
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        if os.path.isfile(f):
            color_thief = ColorThief(f)
            dominant_color= color_thief.get_color(quality=3)
            dominant_r.append(dominant_color[0])
            dominant_g.append(dominant_color[1])
            dominant_b.append(dominant_color[2])
            dominant_rgb.append(dominant_color)
            palette.append(color_thief.get_palette(color_count = 5))
    
    df['R'] = dominant_r
    df['G'] = dominant_g
    df['B'] = dominant_b
    df['Dominant Color'] = dominant_rgb
    df['Color Palette'] = palette

In [121]:
# Retrieve colors and color palette (run only once)
#colorscrape(vgph1, r'C:\Users\s164386\VolVis\epds\life_in_color\van_gogh_paintings')

In [122]:
# Separate the time-consuming code for a faster website (last update: 14/01/2022)

#vg_place_color = vgph2[['Current city', 'Current x', 'Current y', '.jpg name', '.jpg url', 'Dominant Color', 'Color Palette', 'R', 'G', 'B', 'HLS', 'H', 'L', 'S']]
#vg_place_color.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\datasets\vg_place_color.csv', index = False)

## Dataset configuration
Join live data with pre-classified data (city, color) <br>

In [129]:
# Load local data and check for changes
vg_place_color = pd.read_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\datasets\vg_place_color.csv')
if len(vg_place_color)!=len(vgph1):
    print("Table might have been manipulated: re-run all scripts to update information")

# Configure dataset
vgph2 = pd.merge(vgph1, vg_place_color)

# Back-up csv
vgph2.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\datasets\vg_data.csv', index = False)

Table might have been manipulated: re-run all scripts to update information


## Workcount statistics
Retrieve relevant statistics about the artist's life

In [100]:
# Extract from how many works the season can be determined
vgph_temp = vgph1.dropna(subset=['Season'])
print("From " + str(vgph_temp.shape[0]) + " out of " + str(vgph1.shape[0]) + " artworks, the season in which they were made is known.")
vgph_area = pd.DataFrame({'Count' : vgph_temp.groupby( ['Year', 'Season'], dropna=False).size()}).reset_index()
vgph_area.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\area_chart_per_year.csv', index = False)

From 635 out of 868 artworks, the season in which they were made is known.


In [109]:
# Prepare .csv for d3 stacked barchart
vgph_temp2 = pd.DataFrame({'Count': vgph_temp.groupby( ['Created in', 'Season'], dropna=False).size()}).reset_index()
vgph_barchart = pd.DataFrame()
vgph_barchart['place'] = [place for place in vgph1['Created in'].unique()]

autumn, winter, spring, summer = np.zeros(len(vgph_barchart)), np.zeros(len(vgph_barchart)), np.zeros(len(vgph_barchart)), np.zeros(len(vgph_barchart))

for index, place in enumerate(vgph_barchart['place']):
    for i, row in vgph_temp2.iterrows():
        if row['Created in']==place:
            if row['Season']=="Autumn":
                    autumn[index] = row['Count']
            if row['Season']=="Winter":
                    winter[index] = row['Count']
            if row['Season']=="Spring":
                    spring[index] = row['Count']
            if row['Season']=="Summer":
                    summer[index] = row['Count']
vgph_barchart['autumn'] = autumn
vgph_barchart['winter'] = winter
vgph_barchart['spring'] = spring
vgph_barchart['summer'] = summer
vgph_barchart.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\vgph_barchart.csv', index = False)

In [347]:
# Workcount statistics by 'Created in'
vgph3 = pd.DataFrame({'count' : vgph2.groupby( ['Created in', 'Season'], dropna=False).size()}).reset_index()
vgph4 = pd.DataFrame({'Count' : vgph2.groupby( ['Created in', 'Year', 'Season'], dropna=False).size()}).sort_values(['Year']).reset_index()

hierarch_order = []
for place in vgph4['Created in'].unique():
    hierarch_order.append(place)
    
workcount_location = dict()
for index, row in vgph3.iterrows():
    location = row["Created in"]
    if (location in workcount_location):
        works_location = workcount_location[location]
    else:
        works_location = {"Place": location}
        workcount_location[location] = works_location
    works_location[row["Season"]] = row["count"]

workcount_sorted = list()
for index, place in enumerate(hierarch_order):
    workcount_sorted.append(workcount_location[place])

with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\workcount_sorted.json', 'w') as f:
    json.dump(list(workcount_sorted), f)

In [356]:
# Workcount statistics by 'Year'
vgph5 = pd.DataFrame({'Count' : vgph2.groupby(['Year'], dropna=False).size()}).reset_index()

workcount = list()
for index, row in vgph5.iterrows():
    workcount.append({"Year": row[0], "Count": row[1]})

with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\workcount.json', 'w') as f:
    json.dump(workcount, f)

In [359]:
# Workcount statistics by 'Season'
vgph6 = pd.DataFrame({'Count' : vgph2.groupby(['Season'], dropna=False).size()}).reset_index()

workcount_season = list()
for index, row in vgph6.iterrows():
    workcount_season.append({"Season": row[0], "Count": row[1]})

with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\workcount_season.json', 'w') as f:
    json.dump(workcount_season, f)

In [111]:
vgph_barchart

Unnamed: 0,place,autumn,winter,spring,summer
0,The Hague,2.0,0.0,1.0,24.0
1,Scheveningen,0.0,0.0,0.0,1.0
2,Nieuw-Amsterdam,3.0,0.0,0.0,0.0
3,Drenthe,3.0,0.0,0.0,0.0
4,Nuenen,63.0,38.0,54.0,33.0
5,Amsterdam,1.0,0.0,0.0,0.0
6,Antwerp,0.0,6.0,0.0,0.0
7,Paris,0.0,0.0,0.0,0.0
8,Arles,48.0,33.0,52.0,53.0
9,Saint-Rémy,65.0,29.0,26.0,23.0


## Color gradients

In [148]:
# Create the array to draw the gradient - group colors by 'Created in'
# Read retrieved colors and initialize arrays
vgph_color = pd.read_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\datasets\vg_place_color.csv')
colormap_thehague = []
colormap_amsterdam = []
colormap_drenthe = []
colormap_nuenen = []
colormap_antwerp = []
colormap_paris = []
colormap_arles = []
colormap_saintremy = []
colormap_auverssuroise = []

# Store the colors in grouped arrays
for index, row in vgph2.iterrows():
    if (row['Created in'] == "The Hague"):
        colormap_thehague.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Scheveningen"):
        colormap_thehague.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Amsterdam"):
        colormap_amsterdam.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Nieuw-Amsterdam"):
        colormap_amsterdam.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Drenthe"):
        colormap_drenthe.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Nuenen"):
        colormap_nuenen.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Antwerp"):
        colormap_antwerp.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Paris"):
        colormap_paris.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Arles"):
        colormap_arles.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Saint-Rémy"):
        colormap_saintremy.append("rgb"+row['Dominant Color'])
    if (row['Created in'] == "Auvers-sur-Oise"):
        colormap_auverssuroise.append("rgb"+row['Dominant Color'])
        
# Write the .txt files to feature them in .css
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_thehague.txt', "w") as txt_file:
    for line in colormap_thehague:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_amsterdam.txt', "w") as txt_file:
    for line in colormap_amsterdam:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_drenthe.txt', "w") as txt_file:
    for line in colormap_drenthe:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_nuenen.txt', "w") as txt_file:
    for line in colormap_nuenen:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_antwerp.txt', "w") as txt_file:
    for line in colormap_antwerp:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_paris.txt', "w") as txt_file:
    for line in colormap_paris:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_arles.txt', "w") as txt_file:
    for line in colormap_arles:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_saintremy.txt', "w") as txt_file:
    for line in colormap_saintremy:
        txt_file.write("".join(line) + ",")
with open(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\colormaps\cm_auverssuroise.txt', "w") as txt_file:
    for line in colormap_auverssuroise:
        txt_file.write("".join(line) + ",")

In [149]:
vgph2.groupby( ['Year', 'Created in'], dropna=False).size().reset_index()

Unnamed: 0,Year,Created in,0
0,1881,The Hague,1
1,1882,Scheveningen,1
2,1882,The Hague,13
3,1883,Drenthe,3
4,1883,Nieuw-Amsterdam,3
5,1883,The Hague,13
6,1884,Nuenen,59
7,1885,Amsterdam,1
8,1885,Antwerp,7
9,1885,Nuenen,135


## Connection coordinates

In [62]:
# Information about Van Gogh's living pattern, retrieved from Van Gogh Museum website
vg_places = ["Zundert", "Zevenbergen", "Tilburg",
            "The Hague", "London", "Paris", "Dordrecht",
            "Amsterdam", "Borinage", "Brussels", "Etten",
            "The Hague", "Drenthe", "Nuenen", "Antwerp",
            "Paris", "Arles", "Saint-Remy", "Auvers-sur-Oise"]

#    Time consuming: only run once
# vg_lat, vg_lon = [], []
# for place in vg_places:
#     location = gn.geocode(place)
#     vg_lat.append(location.latitude)
#     vg_lon.append(location.longitude)

# vg_locations = pd.DataFrame()
# vg_locations['place'] = vg_places
# vg_locations['lat'] = vg_lat
# vg_locations['lon'] = vg_lon
vg_locations.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\vg_latlon_names.csv', index = False)

In [60]:
# Prepare .csv for connection map
vg_latlon = pd.DataFrame()
vg_lat1 = vg_lat[:len(vg_lat)-1]
vg_lat2 = vg_lat[1:]
vg_lon1 = vg_lon[:len(vg_lon)-1]
vg_lon2 = vg_lon[1:]
vg_latlon['long1'] = vg_lon1
vg_latlon['long2'] = vg_lon2
vg_latlon['lat1'] = vg_lat1
vg_latlon['lat2'] = vg_lat2
vg_latlon.to_csv(r'C:\Users\s164386\VolVis\epds\life_in_color\life_in_color\data\vg_latlon.csv', index = False)

## Color trials

In [149]:
# Convert RGB to HLS for more accurate color representation
vgph2['R'] = vgph2['R'].astype(float)
vgph2['G'] = vgph2['G'].astype(float)
vgph2['B'] = vgph2['B'].astype(float)
temp = vgph2.copy()

temp['to_hls'] = temp[['R', 'G', 'B']].apply(tuple, axis=1)

hls = []
for (r, g, b) in temp['to_hls']:
    hls_calc = (colorsys.rgb_to_hls(r, g, b))
    hls.append(hls_calc)

vgph2['HLS'] = hls
H, L, S = [], [], []

for i, hls in enumerate(vgph2['HLS']):
    H.append(hsl[i][0])
    L.append(hsl[i][1])
    S.append(hsl[i][2])
vgph2['H'] = H
vgph2['L'] = L
vgph2['S'] = S

In [219]:
# Convert RGB to HSV for more accurate color representation
vgph2['R'] = vgph2['R'].astype(float)
vgph2['G'] = vgph2['G'].astype(float)
vgph2['B'] = vgph2['B'].astype(float)
temp = vgph2.copy()

temp['to_hsv'] = temp[['R', 'G', 'B']].apply(tuple, axis=1)

hsv = []
for (r, g, b) in temp['to_hsv']:
    hsv_calc = (colorsys.rgb_to_hsv(r, g, b))
    hsv.append(hsv_calc)

vgph2['HSV'] = hsv

h_hsv, s_hsv, v_hsv = [], [], []

for i, hsv in enumerate(vgph2['HSV']):
    h_hsv.append(hsv[0])
    s_hsv.append(hsv[1])
    v_hsv.append(hsv[2])
vgph2['H_hsv'] = h_hsv
vgph2['S_hsv'] = s_hsv
vgph2['V_hsv'] = v_hsv

vgph2

Unnamed: 0,Title,Date,Current location,Created in,Year,Month,Season,Ownership,.jpg name,.jpg url,...,B,HSL,HLS,H,L,S,HSV,H_hsv,S_hsv,V_hsv
0,Still Life with Cabbage and Clogs,November-December 1881,"Van Gogh Museum, Amsterdam",The Hague,1881,November,Autumn,Museum,Stilleven met kool en klompen - s0137V1962 - V...,https://upload.wikimedia.org/wikipedia/commons...,...,28.0,"(0.10666666666666667, 40.5, -0.31645569620253167)","(0.10666666666666667, 40.5, -0.31645569620253167)",0.106667,40.5,-0.316456,"(0.10666666666666667, 0.4716981132075472, 53.0)",0.106667,0.471698,53.0
1,Beach at Scheveningen in Calm Weather,August 1882,"Minnesota Maritime Art Museum, Winona",The Hague,1882,August,Summer,Museum,Beach at Scheveningen in Calm Weather - My Dre...,https://upload.wikimedia.org/wikipedia/commons...,...,203.0,"(0.08823529411764706, 220.0, -0.0776255707762557)","(0.08823529411764706, 220.0, -0.0776255707762557)",0.088235,220.0,-0.077626,"(0.08823529411764706, 0.14345991561181434, 237.0)",0.088235,0.143460,237.0
2,Dunes,August 1882,Private collection,The Hague,1882,August,Summer,Private,Van Gogh - Landschaft mit Dünen.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,68.0,"(0.11746031746031745, 120.5, -0.4393305439330544)","(0.11746031746031745, 120.5, -0.4393305439330544)",0.117460,120.5,-0.439331,"(0.11746031746031745, 0.6069364161849711, 173.0)",0.117460,0.606936,173.0
3,Women Mending Nets in the Dunes,August 1882,Private collection,The Hague,1882,August,Summer,Private,Van Gogh - Landschaft mit Netzflickerinnen.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,43.0,"(0.12626262626262627, 59.5, -0.28205128205128205)","(0.12626262626262627, 59.5, -0.28205128205128205)",0.126263,59.5,-0.282051,"(0.12626262626262627, 0.4342105263157895, 76.0)",0.126263,0.434211,76.0
4,"A Girl in the Street, Two Coaches in the Backg...",August 1882,"Villa Flora, Winterthur",The Hague,1882,August,Summer,Museum,Van Gogh - Mädchen an einer Haltestelle.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,85.0,"(0.10964912280701755, 123.0, -0.3114754098360656)","(0.10964912280701755, 123.0, -0.3114754098360656)",0.109649,123.0,-0.311475,"(0.10964912280701755, 0.4720496894409938, 161.0)",0.109649,0.472050,161.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
865,The Fields,July 1890,Private collection,Auvers-sur-Oise,1890,July,Summer,Private,Vincent van Gogh - The Fields (1890).jpg,https://upload.wikimedia.org/wikipedia/commons...,...,137.0,"(0.19166666666666665, 147.0, -0.0684931506849315)","(0.19166666666666665, 147.0, -0.0684931506849315)",0.191667,147.0,-0.068493,"(0.19166666666666665, 0.12738853503184713, 157.0)",0.191667,0.127389,157.0
866,Haystacks under a Rainy Sky,July 1890,"Kröller-Müller Museum, Otterlo",Auvers-sur-Oise,1890,July,Summer,Museum,Van Gogh - Heuschober an einem Regentag.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,103.0,"(0.19444444444444442, 124.0, -0.17073170731707...","(0.19444444444444442, 124.0, -0.17073170731707...",0.194444,124.0,-0.170732,"(0.19444444444444442, 0.2896551724137931, 145.0)",0.194444,0.289655,145.0
867,View of Auvers with Church,July 1890,"Rhode Island School of Design Museum, Providence",Auvers-sur-Oise,1890,July,Summer,Museum,Van Gogh - Blick auf Auvers mit Kirche.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,146.0,"(0.08865248226950355, 169.5, -0.1394658753709199)","(0.08865248226950355, 169.5, -0.1394658753709199)",0.088652,169.5,-0.139466,"(0.08865248226950355, 0.24352331606217617, 193.0)",0.088652,0.243523,193.0
868,Wheat Fields with Auvers in the Background,July 1890,Private collection,Auvers-sur-Oise,1890,July,Summer,Private,Van Gogh - Weizenfelder mit Blick auf Auvers.jpeg,https://upload.wikimedia.org/wikipedia/commons...,...,91.0,"(0.09883720930232558, 134.0, -0.3233082706766917)","(0.09883720930232558, 134.0, -0.3233082706766917)",0.098837,134.0,-0.323308,"(0.09883720930232558, 0.4858757062146893, 177.0)",0.098837,0.485876,177.0


In [221]:
# Calculating colors codes by season
autumn, winter, spring, summer, nan = [], [], [], [], []
for i, row in vgph2.iterrows():
    if row['Season'] == "Autumn":
        autumn.append(row['HLS'])
    if row['Season'] == "Winter":
        winter.append(row['HLS'])
    if row['Season'] == "Spring":
        spring.append(row['HLS'])
    if row['Season'] == "Summer":
        summer.append(row['HLS'])
    if row['Season'] == None:
        nan.append(row['HLS'])
        
autumn_rgb, winter_rgb, spring_rgb, summer_rgb, nan_rgb = [], [], [], [], []
for i, row in vgph2.iterrows():
    if row['Season'] == "Autumn":
        autumn_rgb.append((row['R'], row['G'], row['B']))
    if row['Season'] == "Winter":
        winter_rgb.append((row['R'], row['G'], row['B']))
    if row['Season'] == "Spring":
        spring_rgb.append((row['R'], row['G'], row['B']))
    if row['Season'] == "Summer":
        summer_rgb.append((row['R'], row['G'], row['B']))
    if row['Season'] == None:
        nan_rgb.append(row['Dominant Color'])
        
autumn_hsv, winter_hsv, spring_hsv, summer_hsv, nan_hsv = [], [], [], [], []
for i, row in vgph2.iterrows():
    if row['Season'] == "Autumn":
        autumn_hsv.append(row['HSV'])
    if row['Season'] == "Winter":
        winter_hsv.append(row['HSV'])
    if row['Season'] == "Spring":
        spring_hsv.append(row['HSV'])
    if row['Season'] == "Summer":
        summer_hsv.append(row['HSV'])
    if row['Season'] == None:
        nan_hsv.append(row['HSV'])


def mean_color(array):
    x, y, z = [], [], []
    for i, xyz in enumerate(array):
        x.append(array[i][0])
        y.append(array[i][1])
        z.append(array[i][2])
    mean_color = (np.mean(x), np.mean(y), np.mean(z))
    return mean_color
    
mean_autumn_hls = mean_color(autumn)
mean_winter_hls = mean_color(winter)
mean_spring_hls = mean_color(spring)
mean_summer_hls = mean_color(summer)
print(mean_autumn_hls)
print(mean_winter_hls)
print(mean_spring_hls)
print(mean_summer_hls)

mean_autumn_rgb = mean_color(autumn_rgb)
mean_winter_rgb = mean_color(winter_rgb)
mean_spring_rgb = mean_color(spring_rgb)
mean_summer_rgb = mean_color(summer_rgb)
print(mean_autumn_rgb)
print(mean_winter_rgb)
print(mean_spring_rgb)
print(mean_summer_rgb)

mean_autumn_hsv = mean_color(autumn_hsv)
mean_winter_hsv = mean_color(winter_hsv)
mean_spring_hsv = mean_color(spring_hsv)
mean_summer_hsv = mean_color(summer_hsv)
print(mean_autumn_hsv)
print(mean_winter_hsv)
print(mean_spring_hsv)
print(mean_summer_hsv)

(0.20142457357480448, 95.15405405405406, -0.20135634096970206)
(0.2194867548137127, 98.83796296296296, -0.1717135015594034)
(0.22245223791679328, 102.02721088435374, -0.1766319772644328)
(0.22562183777352457, 110.80456852791878, -0.2046211374990921)
(109.35135135135135, 104.63243243243244, 78.91351351351351)
(111.99074074074075, 107.77777777777777, 83.37962962962963)
(111.50340136054422, 112.36054421768708, 88.05442176870748)
(125.34517766497461, 122.23857868020305, 92.79187817258884)
(0.20142457357480448, 0.31516805247461877, 113.43243243243244)
(0.2194867548137127, 0.27152137393858805, 115.98148148148148)
(0.22245223791679328, 0.2826073614078684, 118.10884353741497)
(0.22562183777352457, 0.3182595088794251, 131.9035532994924)
