# A Basic Analysis of the Artworks at The Museum of Modern Art

The data shown here is a sampling of the data from the MoMA dataset([link](https://www.kaggle.com/momanyc/museum-collection)). The data present the names and genre of the artworks and the details of the creators (gender, date of birth, date of death, year of artwork creation)

In this particular analysis we look at the artists and their age when they created their artwork. We also take a peek at the distribution of artist gender in the dataset.

In [15]:
from csv import reader

opened_file = open('MoMA_Artworks.csv',encoding="utf8")
read_file = reader(opened_file)
moma = list(read_file)
moma = moma[1:]

In [16]:

for row in moma:
    nationality = row[2]
    gender = row[5]
    nationality = nationality.replace('(','')
    nationality = nationality.replace(')','')
    gender = gender.replace('(','')
    gender = gender.replace(')','')
    row[2] = nationality
    row[5] = gender
    
print(moma[:5])

[['Ferdinandsbrücke Project, Vienna, Austria, Elevation, preliminary version', 'Otto Wagner', '6210', '(Austrian, 1841–1918)', '(Austrian)', '1841', '(1918)', '(Male)', '1896', 'Ink and cut-and-pasted painted pages on paper', '19 1/8 x 66 1/2" (48.6 x 168.9 cm)', 'Fractional and promised gift of Jo Carole and Ronald S. Lauder', '885.1996', 'Architecture', 'Architecture & Design', '4/9/1996', 'Y', '2', 'http://www.moma.org/collection/works/2', 'http://www.moma.org/media/W1siZiIsIjU5NDA1Il0sWyJwIiwiY29udmVydCIsIi1yZXNpemUgMzAweDMwMFx1MDAzZSJdXQ.jpg?sha=137b8455b1ec6167', '', '', '', '48.6', '', '', '168.9', '', ''], ['City of Music, National Superior Conservatory of Music and Dance, Paris, France, View from interior courtyard', 'Christian de Portzamparc', '7470', '(French, born 1944)', '(French)', '1944', '(0)', '(Male)', '1987', 'Paint and colored pencil on print', '16 x 11 3/4" (40.6 x 29.8 cm)', 'Gift of the architect in honor of Lily Auchincloss', '1.1995', 'Architecture', 'Architect

In [5]:
## 1. String Capitalization ##

for row in moma:
    gender = row[5]
    nationality = row[2]
    gender = gender.title()
    nationality = nationality.title()
    
    if not gender:
        gender = "Gender Unknown/Other"
        
    row[5] = gender
    
    if not nationality:
        nationality = 'Nationality Unknown'
        
    row[2] = nationality    

In [6]:
## 2. Errors During Data Cleaning ##

def clean_and_convert(date):
    # check that we don't have an empty string
    if date != "":
        date = date.replace("(", "")
        date = date.replace(")", "")
        date = int(date)
    return date

for row in moma:
    begindate = row[3]
    enddate = row[4]
    begindate = clean_and_convert(begindate)
    enddate = clean_and_convert(enddate)
    row[3] = begindate
    row[4] = enddate

bad_chars = ["(",")","c","C",".","s","'", " "]

def strip_characters(string):
    for char in bad_chars:
        string = string.replace(char,"")
    return string

def process_date(string):
    if '-' in string:
        temp = string.split('-')
        temp1 = int(temp[0])
        temp2 = int(temp[1])
        date = (temp1+temp2)/2
        return round(date)
    
    date = int(string)
    
    return date

for row in moma:
    date = row[6]
    date = strip_characters(date)
    date = process_date(date)
    row[6] = date


In [12]:
ages = []

for row in moma:
    date = row[6]
    birth = row[3]
    if type(birth) == int:
        age = date - birth
    else:
        age = 0
    
    ages.append(age)
    
final_ages = []
for each in ages:
    if each>20:
        final_age = each
        final_ages.append(final_age)
    else:
        final_age = "Unknown"
        final_ages.append(final_age)

## 3. Converting Ages to Decades ##

decades = []

for each in final_ages:
    if each == "Unknown":
        decade = each
    else:
        decade = str(each)
        decade = decade[:-1]
        decade = decade + "0s"
    
    decades.append(decade)

In [8]:
## 4. Summarizing the Decade Data ##

decade_frequency = {}

for each in decades:
    if each not in decade_frequency:
        decade_frequency[each] = 1
    else:
        decade_frequency[each] += 1
        
print(decade_frequency)

{'30s': 4722, '60s': 1357, '70s': 559, '40s': 4081, '50s': 2434, '20s': 1856, 'Unknown': 1097, '90s': 253, '80s': 364, '100s': 3, '110s': 3}


In [13]:
## 5. Inserting Variables Into Strings ##

artist = "Pablo Picasso"
birth_year = 1881

template = "{}'s birth year is {}".format(artist, birth_year)
print(template)

## 6. Creating an Artist Frequency Table ##

artist_freq = {}

for row in moma:
    artist = row[1]
    if artist not in artist_freq:
        artist_freq[artist] = 1
    else:
        artist_freq[artist] += 1
        

## 7. Creating an Artist Summary Function ##

def artist_summary(artist_name):
    paintings_no = artist_freq[artist_name]
    
    template = "There are {} artworks by {} in the data set".format(paintings_no, artist_name)
    
    print(template)

## Example

artist = "Otto Wagner"
artist_summary(artist)

Pablo Picasso's birth year is 1881


KeyError: 'Otto Wagner'

In [10]:
## 8. Challenge: Summarizing Artwork Gender Data ##

gender_freq = {}

for row in moma:
    gender = row[5]
    if gender in gender_freq:
        gender_freq[gender] += 1
    else:
        gender_freq[gender] = 1
        
for key, value in gender_freq.items():
    template = "There are {:,} artworks by {} artists".format(value, key)
    print(template)

There are 2,443 artworks by Female artists
There are 13,492 artworks by Male artists
There are 794 artworks by Gender Unknown/Other artists
