In [3]:
import numpy as np
import pandas as pd
import glob
import scipy.misc
import tensorflow as tf
import scipy
import os
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

## Explore Bored Apes data

In [4]:
df_apes = pd.read_csv("bored_apes.csv")
df_apes.drop('Unnamed: 0', axis=1, inplace=True)
df_apes

Unnamed: 0,image_id,trait_type,value
0,0,Earring,Silver Hoop
1,0,Background,Orange
2,0,Fur,Robot
3,0,Clothes,Striped Tee
4,0,Mouth,Discomfort
...,...,...,...
58808,9999,Background,Purple
58809,9999,Mouth,Bored Unshaven
58810,9999,Earring,Gold Hoop
58811,9999,Fur,Gray


In [5]:
print(df_apes.trait_type.unique(), len(df_apes.trait_type.unique()))

['Earring' 'Background' 'Fur' 'Clothes' 'Mouth' 'Eyes' 'Hat'] 7


In [6]:
print(df_apes.value.unique(), len(df_apes.value.unique()))

['Silver Hoop' 'Orange' 'Robot' 'Striped Tee' 'Discomfort' 'X Eyes' 'Grin'
 'Vietnam Jacket' 'Blue Beams' '3d' 'Bored Cigarette' "Sea Captain's Hat"
 'Aquamarine' 'Purple' 'Bored' 'Tongue Out' 'Bone Necklace' 'Cheetah'
 'Navy Striped Tee' 'Phoneme L' 'Party Hat 2' 'Golden Brown' 'Closed'
 'Blue' 'Bayc Flipped Brim' 'Brown' 'Army Green' 'Bayc T Red'
 'Dumbfounded' 'Tweed Suit' 'Cream' 'Crazy' 'Yellow' 'S&m Hat' 'Gray'
 'Stuntman Helmet' 'Angry' 'Wool Turtleneck' 'Beanie' 'Gold Stud' 'Zombie'
 'Silver Stud' 'Sleepy' 'Small Grin' "Seaman's Hat" 'Stunt Jacket'
 'Bayc Hat Red' 'Dmt' 'Eyepatch' 'Dark Brown' 'Laurel Wreath'
 'Bored Unshaven Cigarette' 'Smoking Jacket' 'Bloodshot' 'Black Holes T'
 'Wide Eyed' 'Black' 'Bone Tee' 'Coins' 'Police Motorcycle Helmet'
 'Diamond Stud' 'Tanktop' 'Black T' "Girl's Hair Pink" 'Horns'
 'Bored Unshaven' 'New Punk Blue' 'Bayc T Black' 'Commie Hat' 'Tan'
 'Cyborg' 'Bayc Hat Black' 'Rage' 'Bandolier' 'Phoneme Vuh' 'Phoneme  ooo'
 'Tuxedo Tee' 'Bored Pipe' 'T

In [7]:
df_apes_2 = df_apes.copy()
df_apes_2["label"] = df_apes["trait_type"] + " " + df_apes["value"]
df_apes_2

Unnamed: 0,image_id,trait_type,value,label
0,0,Earring,Silver Hoop,Earring Silver Hoop
1,0,Background,Orange,Background Orange
2,0,Fur,Robot,Fur Robot
3,0,Clothes,Striped Tee,Clothes Striped Tee
4,0,Mouth,Discomfort,Mouth Discomfort
...,...,...,...,...
58808,9999,Background,Purple,Background Purple
58809,9999,Mouth,Bored Unshaven,Mouth Bored Unshaven
58810,9999,Earring,Gold Hoop,Earring Gold Hoop
58811,9999,Fur,Gray,Fur Gray


In [8]:
len(df_apes_2.label.unique())

168

In [9]:
df_apes_2.drop(['trait_type', 'value'], axis=1, inplace=True)
df_apes_2

Unnamed: 0,image_id,label
0,0,Earring Silver Hoop
1,0,Background Orange
2,0,Fur Robot
3,0,Clothes Striped Tee
4,0,Mouth Discomfort
...,...,...
58808,9999,Background Purple
58809,9999,Mouth Bored Unshaven
58810,9999,Earring Gold Hoop
58811,9999,Fur Gray


In [10]:
df_apes_2.to_csv("apes_modified.csv", index=False)

In [11]:
df_apes_pivoted = df_apes.pivot(index='image_id', columns='trait_type', values='value')\
            .reset_index()
df_apes_pivoted.columns.name=None
df_apes_pivoted

Unnamed: 0,image_id,Background,Clothes,Earring,Eyes,Fur,Hat,Mouth
0,0,Orange,Striped Tee,Silver Hoop,X Eyes,Robot,,Discomfort
1,1,Orange,Vietnam Jacket,,Blue Beams,Robot,,Grin
2,2,Aquamarine,,,3d,Robot,Sea Captain's Hat,Bored Cigarette
3,3,Purple,Bone Necklace,,Bored,Cheetah,,Tongue Out
4,4,Blue,Navy Striped Tee,,Closed,Golden Brown,Party Hat 2,Phoneme L
...,...,...,...,...,...,...,...,...
9991,9995,Gray,Smoking Jacket,,Closed,Pink,,Bored
9992,9996,New Punk Blue,Guayabera,Silver Hoop,3d,Dark Brown,,Dumbfounded
9993,9997,Purple,Sailor Shirt,,Bored,Black,Halo,Grin Multicolored
9994,9998,Yellow,Bayc T Red,,Heart,Brown,,Bored Unshaven Cigarette


## Getrid of images without info and info without an image

In [27]:
new_apes_df = df_apes_pivoted.copy()

for file in os.listdir('images/bored_apes'):
    if int(file.split(".")[0]) not in new_apes_df.image_id.unique():
        print(f"Removing picture {file}")
#         os.rename(f"images/cryptopunks/{file}", f"images/cryptopunks_test/{file}")
        os.remove("images/bored_apes/" + file)
#         break
for (i, row) in new_apes_df.iterrows():
    if str(row["image_id"])+".jpg" not in os.listdir('images/bored_apes'):
        print(f"Removing row ID {row['image_id']}")
        new_apes_df = new_apes_df[new_apes_df.image_id != row['image_id']]
new_apes_df

Removing picture 2936.jpg
Removing picture 2978.jpg
Removing picture 2999.jpg
Removing picture 8325.jpg
Removing row ID 6913
Removing row ID 6916
Removing row ID 6929
Removing row ID 7639
Removing row ID 7641
Removing row ID 7643
Removing row ID 7646
Removing row ID 7648
Removing row ID 7649
Removing row ID 7650
Removing row ID 7656
Removing row ID 7657
Removing row ID 7662
Removing row ID 7664
Removing row ID 7668
Removing row ID 7670
Removing row ID 7674
Removing row ID 7795
Removing row ID 8076
Removing row ID 8077
Removing row ID 8082
Removing row ID 8083
Removing row ID 8086
Removing row ID 9000
Removing row ID 9451
Removing row ID 9454
Removing row ID 9464
Removing row ID 9845
Removing row ID 9847
Removing row ID 9852
Removing row ID 9855
Removing row ID 9859
Removing row ID 9865
Removing row ID 9866


Unnamed: 0,image_id,Background,Clothes,Earring,Eyes,Fur,Hat,Mouth
0,0,Orange,Striped Tee,Silver Hoop,X Eyes,Robot,,Discomfort
1,1,Orange,Vietnam Jacket,,Blue Beams,Robot,,Grin
2,2,Aquamarine,,,3d,Robot,Sea Captain's Hat,Bored Cigarette
3,3,Purple,Bone Necklace,,Bored,Cheetah,,Tongue Out
4,4,Blue,Navy Striped Tee,,Closed,Golden Brown,Party Hat 2,Phoneme L
...,...,...,...,...,...,...,...,...
9991,9995,Gray,Smoking Jacket,,Closed,Pink,,Bored
9992,9996,New Punk Blue,Guayabera,Silver Hoop,3d,Dark Brown,,Dumbfounded
9993,9997,Purple,Sailor Shirt,,Bored,Black,Halo,Grin Multicolored
9994,9998,Yellow,Bayc T Red,,Heart,Brown,,Bored Unshaven Cigarette


In [28]:
new_apes_df = new_apes_df.reset_index(drop=True)
new_apes_df["image_id"] = new_apes_df.index
new_apes_df

Unnamed: 0,image_id,Background,Clothes,Earring,Eyes,Fur,Hat,Mouth
0,0,Orange,Striped Tee,Silver Hoop,X Eyes,Robot,,Discomfort
1,1,Orange,Vietnam Jacket,,Blue Beams,Robot,,Grin
2,2,Aquamarine,,,3d,Robot,Sea Captain's Hat,Bored Cigarette
3,3,Purple,Bone Necklace,,Bored,Cheetah,,Tongue Out
4,4,Blue,Navy Striped Tee,,Closed,Golden Brown,Party Hat 2,Phoneme L
...,...,...,...,...,...,...,...,...
9957,9957,Gray,Smoking Jacket,,Closed,Pink,,Bored
9958,9958,New Punk Blue,Guayabera,Silver Hoop,3d,Dark Brown,,Dumbfounded
9959,9959,Purple,Sailor Shirt,,Bored,Black,Halo,Grin Multicolored
9960,9960,Yellow,Bayc T Red,,Heart,Brown,,Bored Unshaven Cigarette


In [29]:
new_apes_df.to_csv("apes_updated.csv", index=False)

In [30]:
os.listdir('images/bored_apes')[:10]

['0.jpg',
 '1.jpg',
 '10.jpg',
 '100.jpg',
 '1000.jpg',
 '1001.jpg',
 '1002.jpg',
 '1003.jpg',
 '1004.jpg',
 '1005.jpg']

In [None]:
i = 1111111110
for file in os.listdir('images/bored_apes'):
        os.rename(f"images/bored_apes/{file}", f"images/bored_apes/{i}.jpg")
        i += 1

In [19]:
# df_apes_pivoted.to_csv("apes_pivoted.csv", index=False)

## Punks

In [21]:
df_punks = pd.read_csv("cryptopunks.csv")
df_punks.drop('Unnamed: 0', axis=1, inplace=True)
df_punks

Unnamed: 0,image_id,trait_type,value
0,0,type,Female
1,0,accessory,Green Eye Shadow
2,0,accessory,Earring
3,0,accessory,Blonde Bob
4,0,accessory,3 attributes
...,...,...,...
33520,9998,accessory,Clown Eyes Green
33521,9998,type,Female
33522,9998,accessory,Black Lipstick
33523,9998,accessory,Wild White Hair


In [22]:
print(df_punks.trait_type.unique(), len(df_punks.trait_type.unique()))

['type' 'accessory'] 2


In [23]:
print(df_punks.value.unique(), len(df_punks.value.unique()))

['Female' 'Green Eye Shadow' 'Earring' 'Blonde Bob' '3 attributes'
 'Mohawk' 'Blue Eye Shadow' '2 attributes' 'Tassle Hat' '1 attributes'
 'Male' 'Peak Spike' 'Nerd Glasses' 'Luxurious Beard' 'Mohawk Dark'
 'Frumpy Hair' 'Messy Hair' 'Black Lipstick' 'Regular Shades'
 'Purple Hair' 'Wild Hair' 'Vampire Hair' 'Cap Forward' 'Cap' 'Police Cap'
 'Stringy Hair' 'Eye Patch' 'Mohawk Thin' 'Clown Eyes Green' 'Fedora'
 'Silver Chain' 'Mole' 'Goat' 'Top Hat' 'Front Beard' 'Chinstrap'
 'Headband' 'Ape' 'Hot Lipstick' 'Clown Eyes Blue' 'Bandana' 'Big Shades'
 'Horned Rim Glasses' 'Mustache' 'Muttonchops' 'Small Shades'
 'Straight Hair Dark' 'Hoodie' 'Normal Beard Black' 'Classic Shades'
 'Orange Side' 'Crazy Hair' '4 attributes' 'Vape' 'Purple Eye Shadow'
 'Half Shaved' 'Purple Lipstick' 'VR' 'Normal Beard' 'Wild Blonde' 'Smile'
 'Big Beard' 'Handlebars' 'Front Beard Dark' 'Straight Hair Blonde'
 'Shadow Beard' 'Cigarette' 'Pink With Hat' 'Eye Mask' 'Cowboy Hat'
 'Shaved Head' 'Blonde Short' 'Clow

In [24]:
df_punks_2 = df_punks.copy()
df_punks_2["label"] = df_punks["trait_type"] + " " + df_punks["value"]
df_punks_2

Unnamed: 0,image_id,trait_type,value,label
0,0,type,Female,type Female
1,0,accessory,Green Eye Shadow,accessory Green Eye Shadow
2,0,accessory,Earring,accessory Earring
3,0,accessory,Blonde Bob,accessory Blonde Bob
4,0,accessory,3 attributes,accessory 3 attributes
...,...,...,...,...
33520,9998,accessory,Clown Eyes Green,accessory Clown Eyes Green
33521,9998,type,Female,type Female
33522,9998,accessory,Black Lipstick,accessory Black Lipstick
33523,9998,accessory,Wild White Hair,accessory Wild White Hair


In [25]:
len(df_punks_2.label.unique())

100

In [26]:
df_punks_2.drop(['trait_type', 'value'], axis=1, inplace=True)
df_punks_2

Unnamed: 0,image_id,label
0,0,type Female
1,0,accessory Green Eye Shadow
2,0,accessory Earring
3,0,accessory Blonde Bob
4,0,accessory 3 attributes
...,...,...
33520,9998,accessory Clown Eyes Green
33521,9998,type Female
33522,9998,accessory Black Lipstick
33523,9998,accessory Wild White Hair


In [28]:
df_punks_2.to_csv("punks_modified.csv", index=False)

In [2]:
# df_punks_pivoted = df_punks.copy()
# df_punks_pivoted = df_punks_pivoted.pivot(index='image_id', columns='trait_type', values='value')\
#             .reset_index()
# df_punks_pivoted.columns.name=None
# df_punks_pivoted