# Data Cleaning

> In this notebook we will be blah blah blah

---

## Imports

In [100]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from colorthief import ColorThief


from PIL import Image
import requests
from io import BytesIO

---

## Reading in Data

In [114]:
df = pd.read_csv('../data/wikiart_scraped.csv')
df

Unnamed: 0,Style,Artwork,Artist,Date,Link
0,Early-Dynastic,Narmer Palette,Ancient Egypt,3050 BC,https://uploads3.wikiart.org/00265/images/anci...
1,Early-Dynastic,Box Inlay with a Geometric Pattern,Ancient Egypt,3100-2900 BC,https://uploads2.wikiart.org/00244/images/anci...
2,Old-Kingdom,Khafre Enthroned,Ancient Egypt,2570 BC,https://uploads2.wikiart.org/00305/images/anci...
3,Middle-Kingdom,Stele of the Serpent King (Stela of Djet),Ancient Egypt,3000 BC,https://uploads7.wikiart.org/00305/images/anci...
4,Middle-Kingdom,"Laden Donkeys and Ploughing, Tomb of Djar",Ancient Egypt,2060-2010 BC,https://uploads8.wikiart.org/00244/images/anci...
...,...,...,...,...,...
124165,Street-Photography,Portrait of the corn stalk,Alfred Freddy Krupa,2019,https://uploads5.wikiart.org/00241/images/alfr...
124166,Street-Photography,The other side of life,Alfred Freddy Krupa,2019,https://uploads7.wikiart.org/00241/images/alfr...
124167,Street-Photography,The bonfire during construction,Alfred Freddy Krupa,2019,https://uploads7.wikiart.org/00242/images/alfr...
124168,Street-Photography,Limpidity,Alfred Freddy Krupa,2019,https://uploads7.wikiart.org/00248/images/alfr...


## Cleaning Data Functions

> This function will clean the data by step1, step2, step3. It takes in such and such argument and returns such and such value.

In [118]:
# Function that takes in Dataframe, returns numeric values for dates
def to_date(dataframe):

    #This converts all roman numbers to century
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIX-XX cent.'], value='1900')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XX cent.'],value='2000')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVIII cent.'],value='1800')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIX cent.'],value='1900')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XX-XXI cent.'],value='2000')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVI-XVII cent.'],value='1600')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XV-XVI cent.'],value='1500')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVII-XVIII cent.'],value='1700')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVI cent.'],value='1600')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XV cent.'],value='1500')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIV-XV cent.'],value='1400')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVII cent.'],value='1700')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XII-XIII cent.'],value='1900')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['X cent.'],value='1000')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIII-XIV cent.'],value='1300')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['VIII cent.'],value='800')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['VII-VIII cent.'],value='700')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIV cent.'],value='1400')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XI cent.'],value='1100')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XI-XII cent.'],value='1100')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XXI cent.'],value='2100')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XIX-XX cent.'],value='1900')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['XVIII-XIX cent.'],value='1900')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['47'], value='1447')
    dataframe['Date'] = dataframe['Date'].replace(to_replace=['48'],value='1448')

    ## Hyphen remover
    for index, row in dataframe.iterrows():
        date_range = row['Date']
        for c in date_range:
            if c == '-':
                val = date_range[:date_range.index(c)]
                dataframe.at[index, 'Date'] = (val)
                break

    #This converts all date's to numeric values
    dataframe['Date'] = dataframe['Date'].astype(int)

    return dataframe

In [119]:
def cleaned_data(dataframe):
    """
    cleaned_data is a function that does blah blah blah and returns blah blah blah
    
    args: 
        dataframe: the dataframe that the user wants to clean
    """
    print('Before Cleaning')
    print('='*20)
    print('Columns')
    print(dataframe.columns)
    print('_'*20)
    print('Dataframe Size')
    print(dataframe.shape)
    print('_'*20)
    print('Dataframe Unique Values')
    print(dataframe.nunique())
    print('_'*20)
    print('Null Values in Each Column')
    print(dataframe.isna().sum())
    print('_'*20)
    print('Data Types in Each Column')
    print(dataframe.dtypes)
    print('='*20)
    
    #This drops cuplicates of any work of art to decrease chances of sketches
    dataframe = dataframe.drop_duplicates(subset=['Artwork', 'Artist', 'Date'], 
                       keep='last')
    
    #This drops any values in the style columns that have less than a count of 500
    #So the model can properly train on the style
    s = dataframe['Style'].value_counts() > 500
    s = list(s[s == True].index)
    dataframe = dataframe[dataframe['Style'].isin(s)]
    
    dataframe = to_date(dataframe)
    
    print('After Cleaning')
    print('='*20)
    print('Columns')
    print(dataframe.columns)
    print('_'*20)
    print('Dataframe Size')
    print(dataframe.shape)
    print('_'*20)
    print('Dataframe Unique Values')
    print(dataframe.nunique())
    print('_'*20)
    print('Null Values in Each Column')
    print(dataframe.isna().sum())
    print('_'*20)
    print('Data Types in Each Column')
    print(dataframe.dtypes)
    print('='*20)
    
    #returning the dataframe with an index reset
    return dataframe.reset_index(drop=True)

In [120]:
clean_df = cleaned_data(df)
clean_df

Before Cleaning
Columns
Index(['Style', 'Artwork', 'Artist', 'Date', 'Link'], dtype='object')
____________________
Dataframe Size
(124170, 5)
____________________
Dataframe Unique Values
Style         217
Artwork     92050
Artist       3052
Date         2959
Link       116667
dtype: int64
____________________
Null Values in Each Column
Style      0
Artwork    0
Artist     0
Date       0
Link       0
dtype: int64
____________________
Data Types in Each Column
Style      object
Artwork    object
Artist     object
Date       object
Link       object
dtype: object
After Cleaning
Columns
Index(['Style', 'Artwork', 'Artist', 'Date', 'Link'], dtype='object')
____________________
Dataframe Size
(89682, 5)
____________________
Dataframe Unique Values
Style         51
Artwork    76166
Artist      2632
Date         620
Link       89634
dtype: int64
____________________
Null Values in Each Column
Style      0
Artwork    0
Artist     0
Date       0
Link       0
dtype: int64
____________________
Dat

Unnamed: 0,Style,Artwork,Artist,Date,Link
0,Early-Renaissance,Virgen Con El Niño,Álvaro Pires de Évora,1447,https://uploads6.wikiart.org/00312/images/alva...
1,Early-Renaissance,A Virgem em Majestade,Álvaro Pires de Évora,1448,https://uploads3.wikiart.org/00312/images/alva...
2,Early-Renaissance,"The Sacrifice of Isaac, bronze competition rel...",Filippo Brunelleschi,1401,https://uploads8.wikiart.org/images/filippo-br...
3,Early-Renaissance,Madonna with Child,Filippo Brunelleschi,1402,https://uploads2.wikiart.org/images/filippo-br...
4,Early-Renaissance,The sacrifice of Isaac,Lorenzo Ghiberti,1401,https://uploads0.wikiart.org/00164/images/ghib...
...,...,...,...,...,...
89677,Ukiyo-e,Lucy's Mutation,Takato Yamamoto,2014,https://uploads3.wikiart.org/00147/images/taka...
89678,Ukiyo-e,Common Sense,Takato Yamamoto,2014,https://uploads3.wikiart.org/00147/images/taka...
89679,Ukiyo-e,Sacred Circulation,Takato Yamamoto,2015,https://uploads3.wikiart.org/00147/images/taka...
89680,Ukiyo-e,Ecstasy of Linked Circles,Takato Yamamoto,2015,https://uploads3.wikiart.org/00147/images/taka...


In [121]:
def create_list_files(dir):
    r = []
    for root, dirs, files in os.walk(dir):
        for name in files:
            r.append(os.path.join(root, name))
    return r

list_files = create_list_files('../images/styles/')
list_files[89685]

'../images/styles/Transavantgarde/image_85910.jpg'

# Creating Art Image Folders

In [65]:
# root_path = '../images/styles'
# list_styles = [i for i in clean_df['Style'].unique()]
# for items in list_styles:
#     path = os.path.join(root_path, items)
#     os.mkdir(path)

# Putting Images in Folders

In [99]:
# for i, row in clean_df.iterrows():
#     if i < 2999:
#         continue
#     response = requests.get(clean_df['Link'][i])
#     img = Image.open(BytesIO(response.content))
#     print(clean_df['Style'][i])
#     print(i)
#     print(img.mode)
#     img = img.convert("RGB") 
#     img = img.resize((250,250))
#     filepath = f'../images/styles/{clean_df["Style"][i]}/image_{i}.jpg'
#     img.save(filepath)

High-Renaissance
2999
RGB
High-Renaissance
3000
RGB
High-Renaissance
3001
RGB
High-Renaissance
3002
RGB
High-Renaissance
3003
RGB
High-Renaissance
3004
RGB
High-Renaissance
3005
RGB
High-Renaissance
3006
RGB
High-Renaissance
3007
RGB
High-Renaissance
3008
RGB
High-Renaissance
3009
RGB
High-Renaissance
3010
RGB
High-Renaissance
3011
RGB
High-Renaissance
3012
RGB
High-Renaissance
3013
RGB
High-Renaissance
3014
RGB
High-Renaissance
3015
RGB
High-Renaissance
3016
RGB
High-Renaissance
3017
RGB
High-Renaissance
3018
RGB
High-Renaissance
3019
RGB
High-Renaissance
3020
RGB
High-Renaissance
3021
RGB
High-Renaissance
3022
RGB
High-Renaissance
3023
RGB
High-Renaissance
3024
RGB
High-Renaissance
3025
RGB
High-Renaissance
3026
RGB
High-Renaissance
3027
RGB
High-Renaissance
3028
RGB
High-Renaissance
3029
RGB
High-Renaissance
3030
RGB
High-Renaissance
3031
RGB
High-Renaissance
3032
RGB
High-Renaissance
3033
RGB
High-Renaissance
3034
RGB
High-Renaissance
3035
RGB
High-Renaissance
3036
RGB
High-Renaiss

Mannerism-(Late-Renaissance)
3295
RGB
Mannerism-(Late-Renaissance)
3296
RGB
Mannerism-(Late-Renaissance)
3297
RGB
Mannerism-(Late-Renaissance)
3298
RGB
Mannerism-(Late-Renaissance)
3299
RGB
Mannerism-(Late-Renaissance)
3300
RGB
Mannerism-(Late-Renaissance)
3301
RGB
Mannerism-(Late-Renaissance)
3302
RGB
Mannerism-(Late-Renaissance)
3303
RGB
Mannerism-(Late-Renaissance)
3304
RGB
Mannerism-(Late-Renaissance)
3305
RGB
Mannerism-(Late-Renaissance)
3306
RGB
Mannerism-(Late-Renaissance)
3307
RGB
Mannerism-(Late-Renaissance)
3308
RGB
Mannerism-(Late-Renaissance)
3309
RGB
Mannerism-(Late-Renaissance)
3310
RGB
Mannerism-(Late-Renaissance)
3311
RGB
Mannerism-(Late-Renaissance)
3312
RGB
Mannerism-(Late-Renaissance)
3313
RGB
Mannerism-(Late-Renaissance)
3314
RGB
Mannerism-(Late-Renaissance)
3315
RGB
Mannerism-(Late-Renaissance)
3316
RGB
Mannerism-(Late-Renaissance)
3317
RGB
Mannerism-(Late-Renaissance)
3318
RGB
Mannerism-(Late-Renaissance)
3319
RGB
Mannerism-(Late-Renaissance)
3320
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
3512
RGB
Mannerism-(Late-Renaissance)
3513
RGB
Mannerism-(Late-Renaissance)
3514
RGB
Mannerism-(Late-Renaissance)
3515
RGB
Mannerism-(Late-Renaissance)
3516
RGB
Mannerism-(Late-Renaissance)
3517
RGB
Mannerism-(Late-Renaissance)
3518
RGB
Mannerism-(Late-Renaissance)
3519
RGB
Mannerism-(Late-Renaissance)
3520
RGB
Mannerism-(Late-Renaissance)
3521
RGB
Mannerism-(Late-Renaissance)
3522
RGB
Mannerism-(Late-Renaissance)
3523
RGB
Mannerism-(Late-Renaissance)
3524
RGB
Mannerism-(Late-Renaissance)
3525
RGB
Mannerism-(Late-Renaissance)
3526
RGB
Mannerism-(Late-Renaissance)
3527
RGB
Mannerism-(Late-Renaissance)
3528
RGB
Mannerism-(Late-Renaissance)
3529
RGB
Mannerism-(Late-Renaissance)
3530
RGB
Mannerism-(Late-Renaissance)
3531
RGB
Mannerism-(Late-Renaissance)
3532
RGB
Mannerism-(Late-Renaissance)
3533
RGB
Mannerism-(Late-Renaissance)
3534
RGB
Mannerism-(Late-Renaissance)
3535
RGB
Mannerism-(Late-Renaissance)
3536
RGB
Mannerism-(Late-Renaissance)
3537
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
3728
RGB
Mannerism-(Late-Renaissance)
3729
RGB
Mannerism-(Late-Renaissance)
3730
RGB
Mannerism-(Late-Renaissance)
3731
RGB
Mannerism-(Late-Renaissance)
3732
RGB
Mannerism-(Late-Renaissance)
3733
RGB
Mannerism-(Late-Renaissance)
3734
RGB
Mannerism-(Late-Renaissance)
3735
RGB
Mannerism-(Late-Renaissance)
3736
RGB
Mannerism-(Late-Renaissance)
3737
RGB
Mannerism-(Late-Renaissance)
3738
RGB
Mannerism-(Late-Renaissance)
3739
RGB
Mannerism-(Late-Renaissance)
3740
RGB
Mannerism-(Late-Renaissance)
3741
RGB
Mannerism-(Late-Renaissance)
3742
RGB
Mannerism-(Late-Renaissance)
3743
RGB
Mannerism-(Late-Renaissance)
3744
RGB
Mannerism-(Late-Renaissance)
3745
RGB
Mannerism-(Late-Renaissance)
3746
RGB
Mannerism-(Late-Renaissance)
3747
RGB
Mannerism-(Late-Renaissance)
3748
RGB
Mannerism-(Late-Renaissance)
3749
RGB
Mannerism-(Late-Renaissance)
3750
RGB
Mannerism-(Late-Renaissance)
3751
RGB
Mannerism-(Late-Renaissance)
3752
RGB
Mannerism-(Late-Renaissance)
3753
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
3944
RGB
Mannerism-(Late-Renaissance)
3945
RGB
Mannerism-(Late-Renaissance)
3946
RGB
Mannerism-(Late-Renaissance)
3947
RGB
Mannerism-(Late-Renaissance)
3948
RGB
Mannerism-(Late-Renaissance)
3949
RGB
Mannerism-(Late-Renaissance)
3950
RGB
Mannerism-(Late-Renaissance)
3951
RGB
Mannerism-(Late-Renaissance)
3952
RGB
Mannerism-(Late-Renaissance)
3953
RGB
Mannerism-(Late-Renaissance)
3954
RGB
Mannerism-(Late-Renaissance)
3955
RGB
Mannerism-(Late-Renaissance)
3956
RGB
Mannerism-(Late-Renaissance)
3957
RGB
Mannerism-(Late-Renaissance)
3958
RGB
Mannerism-(Late-Renaissance)
3959
RGB
Mannerism-(Late-Renaissance)
3960
RGB
Mannerism-(Late-Renaissance)
3961
RGB
Mannerism-(Late-Renaissance)
3962
RGB
Mannerism-(Late-Renaissance)
3963
RGB
Mannerism-(Late-Renaissance)
3964
RGB
Mannerism-(Late-Renaissance)
3965
RGB
Mannerism-(Late-Renaissance)
3966
RGB
Mannerism-(Late-Renaissance)
3967
RGB
Mannerism-(Late-Renaissance)
3968
RGB
Mannerism-(Late-Renaissance)
3969
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
4160
RGB
Mannerism-(Late-Renaissance)
4161
RGB
Mannerism-(Late-Renaissance)
4162
RGB
Mannerism-(Late-Renaissance)
4163
RGB
Mannerism-(Late-Renaissance)
4164
RGB
Mannerism-(Late-Renaissance)
4165
RGB
Mannerism-(Late-Renaissance)
4166
RGB
Mannerism-(Late-Renaissance)
4167
RGB
Mannerism-(Late-Renaissance)
4168
RGB
Mannerism-(Late-Renaissance)
4169
RGB
Mannerism-(Late-Renaissance)
4170
RGB
Mannerism-(Late-Renaissance)
4171
RGB
Mannerism-(Late-Renaissance)
4172
RGB
Mannerism-(Late-Renaissance)
4173
RGB
Mannerism-(Late-Renaissance)
4174
RGB
Mannerism-(Late-Renaissance)
4175
RGB
Mannerism-(Late-Renaissance)
4176
RGB
Mannerism-(Late-Renaissance)
4177
RGB
Mannerism-(Late-Renaissance)
4178
RGB
Mannerism-(Late-Renaissance)
4179
RGB
Mannerism-(Late-Renaissance)
4180
RGB
Mannerism-(Late-Renaissance)
4181
RGB
Mannerism-(Late-Renaissance)
4182
RGB
Mannerism-(Late-Renaissance)
4183
RGB
Mannerism-(Late-Renaissance)
4184
RGB
Mannerism-(Late-Renaissance)
4185
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
4376
RGB
Mannerism-(Late-Renaissance)
4377
RGB
Mannerism-(Late-Renaissance)
4378
RGB
Mannerism-(Late-Renaissance)
4379
RGB
Mannerism-(Late-Renaissance)
4380
RGB
Mannerism-(Late-Renaissance)
4381
RGB
Mannerism-(Late-Renaissance)
4382
RGB
Mannerism-(Late-Renaissance)
4383
RGB
Mannerism-(Late-Renaissance)
4384
RGB
Mannerism-(Late-Renaissance)
4385
RGB
Mannerism-(Late-Renaissance)
4386
RGB
Mannerism-(Late-Renaissance)
4387
RGB
Mannerism-(Late-Renaissance)
4388
RGB
Mannerism-(Late-Renaissance)
4389
RGB
Mannerism-(Late-Renaissance)
4390
RGB
Mannerism-(Late-Renaissance)
4391
RGB
Mannerism-(Late-Renaissance)
4392
RGB
Mannerism-(Late-Renaissance)
4393
RGB
Mannerism-(Late-Renaissance)
4394
RGB
Mannerism-(Late-Renaissance)
4395
RGB
Mannerism-(Late-Renaissance)
4396
RGB
Mannerism-(Late-Renaissance)
4397
RGB
Mannerism-(Late-Renaissance)
4398
RGB
Mannerism-(Late-Renaissance)
4399
RGB
Mannerism-(Late-Renaissance)
4400
RGB
Mannerism-(Late-Renaissance)
4401
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
4592
RGB
Mannerism-(Late-Renaissance)
4593
RGB
Mannerism-(Late-Renaissance)
4594
RGB
Mannerism-(Late-Renaissance)
4595
RGB
Mannerism-(Late-Renaissance)
4596
RGB
Mannerism-(Late-Renaissance)
4597
RGB
Mannerism-(Late-Renaissance)
4598
RGB
Mannerism-(Late-Renaissance)
4599
RGB
Mannerism-(Late-Renaissance)
4600
RGB
Mannerism-(Late-Renaissance)
4601
RGB
Mannerism-(Late-Renaissance)
4602
RGB
Mannerism-(Late-Renaissance)
4603
RGB
Mannerism-(Late-Renaissance)
4604
RGB
Mannerism-(Late-Renaissance)
4605
RGB
Mannerism-(Late-Renaissance)
4606
RGB
Mannerism-(Late-Renaissance)
4607
RGB
Mannerism-(Late-Renaissance)
4608
RGB
Mannerism-(Late-Renaissance)
4609
RGB
Mannerism-(Late-Renaissance)
4610
RGB
Mannerism-(Late-Renaissance)
4611
RGB
Mannerism-(Late-Renaissance)
4612
RGB
Mannerism-(Late-Renaissance)
4613
RGB
Mannerism-(Late-Renaissance)
4614
RGB
Mannerism-(Late-Renaissance)
4615
RGB
Mannerism-(Late-Renaissance)
4616
RGB
Mannerism-(Late-Renaissance)
4617
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
4808
RGB
Mannerism-(Late-Renaissance)
4809
RGB
Mannerism-(Late-Renaissance)
4810
RGB
Mannerism-(Late-Renaissance)
4811
RGB
Mannerism-(Late-Renaissance)
4812
RGB
Mannerism-(Late-Renaissance)
4813
RGB
Mannerism-(Late-Renaissance)
4814
RGB
Mannerism-(Late-Renaissance)
4815
RGB
Mannerism-(Late-Renaissance)
4816
RGB
Mannerism-(Late-Renaissance)
4817
RGB
Mannerism-(Late-Renaissance)
4818
RGB
Mannerism-(Late-Renaissance)
4819
RGB
Mannerism-(Late-Renaissance)
4820
RGB
Mannerism-(Late-Renaissance)
4821
RGB
Mannerism-(Late-Renaissance)
4822
RGB
Mannerism-(Late-Renaissance)
4823
RGB
Mannerism-(Late-Renaissance)
4824
RGB
Mannerism-(Late-Renaissance)
4825
RGB
Mannerism-(Late-Renaissance)
4826
RGB
Mannerism-(Late-Renaissance)
4827
RGB
Mannerism-(Late-Renaissance)
4828
RGB
Mannerism-(Late-Renaissance)
4829
RGB
Mannerism-(Late-Renaissance)
4830
RGB
Mannerism-(Late-Renaissance)
4831
RGB
Mannerism-(Late-Renaissance)
4832
RGB
Mannerism-(Late-Renaissance)
4833
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
5024
RGB
Mannerism-(Late-Renaissance)
5025
RGB
Mannerism-(Late-Renaissance)
5026
RGB
Mannerism-(Late-Renaissance)
5027
RGB
Mannerism-(Late-Renaissance)
5028
RGB
Mannerism-(Late-Renaissance)
5029
RGB
Mannerism-(Late-Renaissance)
5030
RGB
Mannerism-(Late-Renaissance)
5031
RGB
Mannerism-(Late-Renaissance)
5032
RGB
Mannerism-(Late-Renaissance)
5033
RGB
Mannerism-(Late-Renaissance)
5034
RGB
Mannerism-(Late-Renaissance)
5035
RGB
Mannerism-(Late-Renaissance)
5036
RGB
Mannerism-(Late-Renaissance)
5037
RGB
Mannerism-(Late-Renaissance)
5038
RGBA
Mannerism-(Late-Renaissance)
5039
RGB
Mannerism-(Late-Renaissance)
5040
RGB
Mannerism-(Late-Renaissance)
5041
RGB
Mannerism-(Late-Renaissance)
5042
RGB
Mannerism-(Late-Renaissance)
5043
RGB
Mannerism-(Late-Renaissance)
5044
RGBA
Mannerism-(Late-Renaissance)
5045
RGB
Mannerism-(Late-Renaissance)
5046
RGB
Mannerism-(Late-Renaissance)
5047
RGB
Mannerism-(Late-Renaissance)
5048
RGB
Mannerism-(Late-Renaissance)
5049
RGB
Mannerism-

Mannerism-(Late-Renaissance)
5240
RGB
Mannerism-(Late-Renaissance)
5241
RGB
Mannerism-(Late-Renaissance)
5242
RGB
Mannerism-(Late-Renaissance)
5243
RGB
Mannerism-(Late-Renaissance)
5244
RGB
Mannerism-(Late-Renaissance)
5245
RGB
Mannerism-(Late-Renaissance)
5246
RGB
Mannerism-(Late-Renaissance)
5247
RGB
Mannerism-(Late-Renaissance)
5248
RGB
Mannerism-(Late-Renaissance)
5249
RGB
Mannerism-(Late-Renaissance)
5250
RGB
Mannerism-(Late-Renaissance)
5251
RGB
Mannerism-(Late-Renaissance)
5252
RGB
Mannerism-(Late-Renaissance)
5253
RGB
Mannerism-(Late-Renaissance)
5254
RGB
Mannerism-(Late-Renaissance)
5255
RGB
Mannerism-(Late-Renaissance)
5256
RGB
Mannerism-(Late-Renaissance)
5257
RGB
Mannerism-(Late-Renaissance)
5258
RGB
Mannerism-(Late-Renaissance)
5259
RGB
Mannerism-(Late-Renaissance)
5260
RGB
Mannerism-(Late-Renaissance)
5261
RGB
Mannerism-(Late-Renaissance)
5262
RGB
Mannerism-(Late-Renaissance)
5263
RGB
Mannerism-(Late-Renaissance)
5264
RGB
Mannerism-(Late-Renaissance)
5265
RGB
Mannerism-(L

Mannerism-(Late-Renaissance)
5456
RGB
Mannerism-(Late-Renaissance)
5457
RGB
Mannerism-(Late-Renaissance)
5458
RGB
Mannerism-(Late-Renaissance)
5459
RGB
Mannerism-(Late-Renaissance)
5460
RGB
Mannerism-(Late-Renaissance)
5461
RGB
Mannerism-(Late-Renaissance)
5462
RGB
Mannerism-(Late-Renaissance)
5463
RGB
Mannerism-(Late-Renaissance)
5464
RGB
Mannerism-(Late-Renaissance)
5465
RGB
Mannerism-(Late-Renaissance)
5466
RGB
Mannerism-(Late-Renaissance)
5467
RGB
Mannerism-(Late-Renaissance)
5468
RGB
Mannerism-(Late-Renaissance)
5469
RGB
Mannerism-(Late-Renaissance)
5470
RGB
Mannerism-(Late-Renaissance)
5471
RGB
Mannerism-(Late-Renaissance)
5472
RGB
Mannerism-(Late-Renaissance)
5473
RGB
Mannerism-(Late-Renaissance)
5474
RGB
Mannerism-(Late-Renaissance)
5475
RGB
Mannerism-(Late-Renaissance)
5476
RGB
Mannerism-(Late-Renaissance)
5477
RGB
Mannerism-(Late-Renaissance)
5478
RGB
Mannerism-(Late-Renaissance)
5479
RGB
Mannerism-(Late-Renaissance)
5480
RGB
Mannerism-(Late-Renaissance)
5481
RGB
Mannerism-(L

Northern-Renaissance
5710
RGB
Northern-Renaissance
5711
RGB
Northern-Renaissance
5712
RGB
Northern-Renaissance
5713
RGB
Northern-Renaissance
5714
RGB
Northern-Renaissance
5715
RGB
Northern-Renaissance
5716
RGB
Northern-Renaissance
5717
RGB
Northern-Renaissance
5718
RGB
Northern-Renaissance
5719
RGB
Northern-Renaissance
5720
RGB
Northern-Renaissance
5721
RGB
Northern-Renaissance
5722
RGB
Northern-Renaissance
5723
RGB
Northern-Renaissance
5724
RGB
Northern-Renaissance
5725
RGB
Northern-Renaissance
5726
RGB
Northern-Renaissance
5727
RGB
Northern-Renaissance
5728
RGB
Northern-Renaissance
5729
RGB
Northern-Renaissance
5730
RGB
Northern-Renaissance
5731
RGB
Northern-Renaissance
5732
RGB
Northern-Renaissance
5733
RGB
Northern-Renaissance
5734
RGB
Northern-Renaissance
5735
RGB
Northern-Renaissance
5736
RGB
Northern-Renaissance
5737
RGB
Northern-Renaissance
5738
RGB
Northern-Renaissance
5739
RGB
Northern-Renaissance
5740
RGB
Northern-Renaissance
5741
RGB
Northern-Renaissance
5742
RGB
Northern-R

Northern-Renaissance
5984
RGB
Northern-Renaissance
5985
RGB
Northern-Renaissance
5986
RGBA
Northern-Renaissance
5987
RGB
Northern-Renaissance
5988
RGB
Northern-Renaissance
5989
RGB
Northern-Renaissance
5990
RGB
Northern-Renaissance
5991
RGB
Northern-Renaissance
5992
RGB
Northern-Renaissance
5993
RGB
Northern-Renaissance
5994
RGB
Northern-Renaissance
5995
RGB
Northern-Renaissance
5996
RGB
Northern-Renaissance
5997
RGB
Northern-Renaissance
5998
RGB
Northern-Renaissance
5999
RGB
Northern-Renaissance
6000
RGB
Northern-Renaissance
6001
RGB
Northern-Renaissance
6002
RGB
Northern-Renaissance
6003
RGB
Northern-Renaissance
6004
RGB
Northern-Renaissance
6005
RGB
Northern-Renaissance
6006
RGB
Northern-Renaissance
6007
RGB
Northern-Renaissance
6008
RGB
Northern-Renaissance
6009
RGB
Northern-Renaissance
6010
RGB
Northern-Renaissance
6011
RGB
Northern-Renaissance
6012
RGB
Northern-Renaissance
6013
RGB
Northern-Renaissance
6014
RGB
Northern-Renaissance
6015
RGB
Northern-Renaissance
6016
RGB
Northern-

Northern-Renaissance
6258
RGB
Northern-Renaissance
6259
RGB
Northern-Renaissance
6260
RGB
Northern-Renaissance
6261
RGB
Northern-Renaissance
6262
RGB
Northern-Renaissance
6263
RGB
Northern-Renaissance
6264
RGB
Northern-Renaissance
6265
RGB
Northern-Renaissance
6266
RGB
Northern-Renaissance
6267
RGB
Northern-Renaissance
6268
RGB
Northern-Renaissance
6269
RGB
Northern-Renaissance
6270
RGB
Northern-Renaissance
6271
RGB
Northern-Renaissance
6272
RGB
Northern-Renaissance
6273
RGB
Northern-Renaissance
6274
RGB
Northern-Renaissance
6275
RGB
Northern-Renaissance
6276
RGB
Northern-Renaissance
6277
RGB
Northern-Renaissance
6278
RGB
Northern-Renaissance
6279
RGB
Northern-Renaissance
6280
RGB
Northern-Renaissance
6281
RGB
Northern-Renaissance
6282
RGB
Northern-Renaissance
6283
RGB
Northern-Renaissance
6284
RGB
Northern-Renaissance
6285
RGB
Northern-Renaissance
6286
RGB
Northern-Renaissance
6287
RGB
Northern-Renaissance
6288
RGBA
Northern-Renaissance
6289
RGB
Northern-Renaissance
6290
RGB
Northern-

Northern-Renaissance
6534
RGB
Northern-Renaissance
6535
RGB
Northern-Renaissance
6536
RGB
Northern-Renaissance
6537
RGB
Northern-Renaissance
6538
RGB
Northern-Renaissance
6539
RGB
Northern-Renaissance
6540
L
Northern-Renaissance
6541
L
Northern-Renaissance
6542
RGB
Northern-Renaissance
6543
L
Northern-Renaissance
6544
L
Northern-Renaissance
6545
RGB
Northern-Renaissance
6546
RGB
Northern-Renaissance
6547
RGB
Northern-Renaissance
6548
RGB
Northern-Renaissance
6549
RGB
Northern-Renaissance
6550
RGB
Northern-Renaissance
6551
RGB
Northern-Renaissance
6552
RGB
Northern-Renaissance
6553
RGB
Northern-Renaissance
6554
L
Northern-Renaissance
6555
RGB
Northern-Renaissance
6556
RGB
Northern-Renaissance
6557
RGB
Northern-Renaissance
6558
RGB
Northern-Renaissance
6559
RGB
Northern-Renaissance
6560
RGB
Northern-Renaissance
6561
RGB
Northern-Renaissance
6562
RGB
Northern-Renaissance
6563
RGB
Northern-Renaissance
6564
L
Northern-Renaissance
6565
RGB
Northern-Renaissance
6566
RGB
Northern-Renaissance
6

ConnectionError: HTTPSConnectionPool(host='uploads1.wikiart.org', port=443): Max retries exceeded with url: /images/albrecht-durer/upper-body-of-christ-1508.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7febf3015580>: Failed to establish a new connection: [Errno 60] Operation timed out'))

In [None]:
for i, row in clean_df.iterrows():
    response = requests.get(clean_df['Link'][i])
    img = Image.open(BytesIO(response.content))
    filepath = f'../images/styles/{clean_df["Style"][i]}/image_{i}.jpg'
    color_thief = ColorThief(filepath)
    clean_df['dom_color'][i] = color_thief.get_color(quality=1)

In [87]:
clean_df

Unnamed: 0,Style,Artwork,Artist,Date,Link,dom_color
0,Early-Renaissance,Virgen Con El Niño,Álvaro Pires de Évora,47,https://uploads6.wikiart.org/00312/images/alva...,"(123, 82, 53)"
1,Early-Renaissance,A Virgem em Majestade,Álvaro Pires de Évora,48,https://uploads3.wikiart.org/00312/images/alva...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
2,Early-Renaissance,"The Sacrifice of Isaac, bronze competition rel...",Filippo Brunelleschi,1401,https://uploads8.wikiart.org/images/filippo-br...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
3,Early-Renaissance,Madonna with Child,Filippo Brunelleschi,1402,https://uploads2.wikiart.org/images/filippo-br...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
4,Early-Renaissance,The sacrifice of Isaac,Lorenzo Ghiberti,1401-1402,https://uploads0.wikiart.org/00164/images/ghib...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
...,...,...,...,...,...,...
89689,Ukiyo-e,Lucy's Mutation,Takato Yamamoto,2014,https://uploads3.wikiart.org/00147/images/taka...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
89690,Ukiyo-e,Common Sense,Takato Yamamoto,2014,https://uploads3.wikiart.org/00147/images/taka...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
89691,Ukiyo-e,Sacred Circulation,Takato Yamamoto,2015,https://uploads3.wikiart.org/00147/images/taka...,<colorthief.ColorThief object at 0x7fec42fd4eb0>
89692,Ukiyo-e,Ecstasy of Linked Circles,Takato Yamamoto,2015,https://uploads3.wikiart.org/00147/images/taka...,<colorthief.ColorThief object at 0x7fec42fd4eb0>


## Saving the Cleaned Dataframe

> We save the data as blah blah so that in later notebooks it can be used for eda and modeling prurposes much easier.

In [16]:
clean_df.to_csv('../data/clean_art.csv', index = False)