## setup

In [67]:
import pandas as pd
import cv2
import os

import time

import requests
import json
import urllib

## Basic Requests Checkout

MET guideline here:<br>
https://metmuseum.github.io/

In [68]:
# checkout all departments
url = "https://collectionapi.metmuseum.org/public/collection/v1/departments"
r = requests.get(url)
r

<Response [200]>

In [69]:
r.text

'{"departments":[{"departmentId":1,"displayName":"American Decorative Arts"},{"departmentId":3,"displayName":"Ancient Near Eastern Art"},{"departmentId":4,"displayName":"Arms and Armor"},{"departmentId":5,"displayName":"Arts of Africa, Oceania, and the Americas"},{"departmentId":6,"displayName":"Asian Art"},{"departmentId":7,"displayName":"The Cloisters"},{"departmentId":8,"displayName":"The Costume Institute"},{"departmentId":9,"displayName":"Drawings and Prints"},{"departmentId":10,"displayName":"Egyptian Art"},{"departmentId":11,"displayName":"European Paintings"},{"departmentId":12,"displayName":"European Sculpture and Decorative Arts"},{"departmentId":13,"displayName":"Greek and Roman Art"},{"departmentId":14,"displayName":"Islamic Art"},{"departmentId":15,"displayName":"The Robert Lehman Collection"},{"departmentId":16,"displayName":"The Libraries"},{"departmentId":17,"displayName":"Medieval Art"},{"departmentId":18,"displayName":"Musical Instruments"},{"departmentId":19,"display

In [70]:
# search a specific department
url = "https://collectionapi.metmuseum.org/public/collection/v1/objects?departmentIds=8"
r = requests.get(url)
r

<Response [200]>

In [71]:
costume_objectsIDs = r.json()['objectIDs']
len(costume_objectsIDs)

31550

In [72]:
# Search by a topic
url = "https://collectionapi.metmuseum.org/public/collection/v1/search?q=robe"
r = requests.get(url)
r

<Response [200]>

In [76]:
r.json().keys()

dict_keys(['total', 'objectIDs'])

In [79]:
# checkout a specific object by object id
url = "https://collectionapi.metmuseum.org/public/collection/v1/objects/53713"
r = requests.get(url)
r

<Response [200]>

## prototype for dataframes concat function

In [80]:
# to get one object info
x = pd.DataFrame.from_dict(r.json(), orient='index').T
x

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,classification,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber
0,53713,False,32.30.10,1932,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[https://images.metmuseum.org/CRDImages/as/ori...,,Asian Art,...,Costumes-Embroidered,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,


In [81]:
url = "https://collectionapi.metmuseum.org/public/collection/v1/objects/79667"
r = requests.get(url)
r
y = pd.DataFrame.from_dict(r.json(), orient='index').T
y

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,classification,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber
0,79667,False,"1997.478.8a, b",1997,False,,,[],"[{'constituentID': 161959, 'role': 'Designer',...",Costume Institute,...,,,,2022-10-05T04:56:22.043Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,


In [82]:
pd.concat([x,y])

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,classification,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber
0,53713,False,32.30.10,1932,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[https://images.metmuseum.org/CRDImages/as/ori...,,Asian Art,...,Costumes-Embroidered,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,
0,79667,False,"1997.478.8a, b",1997,False,,,[],"[{'constituentID': 161959, 'role': 'Designer',...",Costume Institute,...,,,,2022-10-05T04:56:22.043Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,


## scrape: The Costume Institute Department

In [83]:
type(costume_objectsIDs) # list of objects ID in the costume department created earlier

list

In [41]:
urlstring = 'https://collectionapi.metmuseum.org/public/collection/v1/objects/'

for id in costume_objectsIDs:
    thisurl = thisurl = urlstring + str(id)
    r = requests.get(thisurl)
    thisdf = pd.DataFrame.from_dict(r.json(), orient='index').T

    df = pd.concat([df,thisdf])

# took 64m53s

In [88]:
df.to_csv(r'scrapped_data/MET_costume_dept.csv', index = False) 

In [89]:
df = pd.read_csv('scrapped_data/MET_costume_dept.csv') 

## def function for scrapping

### function: get_objectid_list from a certain topic

In [4]:
# define a function to get the object id list
# of a certain topic
def get_objectid_list(keyword):
    root = "https://collectionapi.metmuseum.org/public/collection/v1/search?q="
    target_url = root + keyword
    r = requests.get(target_url)
    idlist = r.json()['objectIDs']
    print(f'there are {len(idlist)} object(s) about {keyword}')

    return idlist

### function: scrapping

In [5]:
def scrapping(keyword):
   start = time.time()

   target_columns = ['objectID', 'isHighlight', 'accessionNumber', 'accessionYear',
       'isPublicDomain', 'primaryImage', 'primaryImageSmall',
       'additionalImages', 'constituents', 'department', 'objectName', 'title',
       'culture', 'period', 'dynasty', 'reign', 'portfolio', 'artistRole',
       'artistPrefix', 'artistDisplayName', 'artistDisplayBio', 'artistSuffix',
       'artistAlphaSort', 'artistNationality', 'artistBeginDate',
       'artistEndDate', 'artistGender', 'artistWikidata_URL', 'artistULAN_URL',
       'objectDate', 'objectBeginDate', 'objectEndDate', 'medium',
       'dimensions', 'measurements', 'creditLine', 'geographyType', 'city',
       'state', 'county', 'country', 'region', 'subregion', 'locale', 'locus',
       'excavation', 'river', 'classification', 'rightsAndReproduction',
       'linkResource', 'metadataDate', 'repository', 'objectURL', 'tags',
       'objectWikidata_URL', 'isTimelineWork', 'GalleryNumber']
       
       
   # initiate an empty df
   df = pd.DataFrame(columns=target_columns)

   idlist = get_objectid_list(keyword)

   urlstring = 'https://collectionapi.metmuseum.org/public/collection/v1/objects/'

   for id in idlist:
      thisurl = thisurl = urlstring + str(id)
      r = requests.get(thisurl)
      thisdf = pd.DataFrame.from_dict(r.json(), orient='index').T
      df = pd.concat([df,thisdf])
   
   end = time.time()
   print(f'{end-start} secs slipped...')
   
   return df



### function: scrapping using an existing list

In [91]:
def scrapping_by_idlist(idlist):
   start = time.time()

   target_columns = ['objectID', 'isHighlight', 'accessionNumber', 'accessionYear',
       'isPublicDomain', 'primaryImage', 'primaryImageSmall',
       'additionalImages', 'constituents', 'department', 'objectName', 'title',
       'culture', 'period', 'dynasty', 'reign', 'portfolio', 'artistRole',
       'artistPrefix', 'artistDisplayName', 'artistDisplayBio', 'artistSuffix',
       'artistAlphaSort', 'artistNationality', 'artistBeginDate',
       'artistEndDate', 'artistGender', 'artistWikidata_URL', 'artistULAN_URL',
       'objectDate', 'objectBeginDate', 'objectEndDate', 'medium',
       'dimensions', 'measurements', 'creditLine', 'geographyType', 'city',
       'state', 'county', 'country', 'region', 'subregion', 'locale', 'locus',
       'excavation', 'river', 'classification', 'rightsAndReproduction',
       'linkResource', 'metadataDate', 'repository', 'objectURL', 'tags',
       'objectWikidata_URL', 'isTimelineWork', 'GalleryNumber']
       
       
   # initiate an empty df
   df = pd.DataFrame(columns=target_columns)

   urlstring = 'https://collectionapi.metmuseum.org/public/collection/v1/objects/'

   for id in idlist:
      thisurl = thisurl = urlstring + str(id)
      r = requests.get(thisurl)
      thisdf = pd.DataFrame.from_dict(r.json(), orient='index').T
      df = pd.concat([df,thisdf])
   
   end = time.time()
   print(f'{end-start} secs slipped...')
   
   return df

### function: download_img

In [98]:
def download_img(df,dir='testing/'):
    start = time.time()
    print(f'{df.shape[0]} images will be downloaded.')

    name = df['objectID']
    url = df['primaryImage']

    name = list(name)
    url = list(url)

    error_id = []
    error_n = 0

    for n,u in zip(name,url):
        filename = dir + str(n) + '.jpg'
        try:
            z = urllib.request.urlopen(u)
            output = open(filename, 'wb')
            output.write(z.read())
            output.close()
        except:
            error_id.append(n)
            print(f'error id {n}')
            error_n += 1
            print(f'{error_n} errors in total')
            continue


    end = time.time()
    print(f'{end - start} secs slipped...')

## scrape: robe

this is a test batch, for detecting filtering issues

found issues:
- unrelated topics: may be fixed by filtering classification, single word topic
- black & white

### download dataframe

In [125]:
df_robe = scrapping('robe')

there are 2470 object(s) about robe
685.2680084705353 secs slipped...


In [126]:
df_robe

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber,message
0,53715,False,43.119,1943,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,214,
0,450958,False,48.187.667,1948,False,,,[],,Islamic Art,...,,,2021-11-29T10:04:10.363Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
0,751497,True,2017.717,2017,True,https://images.metmuseum.org/CRDImages/ad/orig...,https://images.metmuseum.org/CRDImages/ad/web-...,[],,The American Wing,...,,,2022-06-02T12:28:57.13Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,https://www.wikidata.org/wiki/Q104413096,False,,
0,85512,False,1975.344.4,1975,False,,,[],,Islamic Art,...,,,2020-12-30T04:37:58.547Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
0,227775,False,54.203,1954,False,,,[],,European Sculpture and Decorative Arts,...,,,2022-02-09T04:42:08.947Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,352478,False,38.52,1938,True,https://images.metmuseum.org/CRDImages/dp/orig...,https://images.metmuseum.org/CRDImages/dp/web-...,[https://images.metmuseum.org/CRDImages/dp/ori...,"[{'constituentID': 83628, 'role': 'Editor', 'n...",Drawings and Prints,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Men', 'AAT_URL': 'http://vocab.gett...",,True,,
0,368861,False,25.62.32r(a),1925,False,,,[],"[{'constituentID': 166181, 'role': 'Artist', '...",Drawings and Prints,...,,,2021-08-20T04:36:13.79Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
0,361192,False,20.81.3.167,1920,False,,,[],"[{'constituentID': 212471, 'role': 'Etcher', '...",Drawings and Prints,...,,,2021-08-20T04:36:13.79Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
0,770912,False,18.17.1-45,1918,False,,,[],"[{'constituentID': 95757, 'role': 'Artist', 'n...",Drawings and Prints,...,,,2021-06-05T04:45:37.2Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,


In [134]:
df_robe.to_csv(r'scrapped_data/MET_robe.csv', index = False)

### prepare to download image

In [102]:
df_robe = pd.read_csv('scrapped_data/MET_robe.csv')

In [103]:
df_robe.shape

(2470, 58)

In [104]:
condition1 = (df_robe['isPublicDomain'] == True) # select the ones is public domain
condition2 = (df_robe['primaryImage'] != '') # select the ones has primary images

df_sel = df_robe.loc[condition1 & condition2]

In [105]:
df_sel['classification'].unique()

array(['Costumes-Embroidered', nan, 'Main dress-Womenswear', 'Main dress',
       'Outerwear-Womenswear', 'Main dress-Menswear',
       'Night and Dressing Wear', 'Costumes', 'Textiles-Costumes',
       'Costumes-Tapestries', 'Ceramics', 'Textiles-Embroidered',
       'Main dress-Childrenswear', 'Crèche', 'Paintings',
       'Costumes-Woven', 'Textiles-Woven', 'Textiles-Velvets',
       'Sculpture', 'Hide-Costumes', 'Codices', 'Textiles-Laces',
       'Glass-Stained', 'Prints', 'Photographs', 'Textiles-Tapestries',
       'Drawings', 'Lacquer', 'Furniture', 'Costumes-Velvets', 'Textiles',
       'Metalwork-Lead', 'Textiles-Painted and Dyed',
       'Textiles-Painted and Printed', 'Calligraphy',
       'Textiles-Dyed and Embroidered', 'Outerwear-Menswear', 'Netsuke',
       'Illustrated Books', 'Lapidary Work-Sardonyx', 'Metalwork-Silver',
       'Metalwork', 'Jade', 'Enamels-Basse taille', 'Inrō',
       'Textiles-Dyed', 'Idiophone-Shaken-crotal bell', 'Sculpture-Stone',
       'Gold a

In [106]:
sel = ['Costumes-Embroidered', 'Main dress-Womenswear', 'Main dress',
       'Outerwear-Womenswear', 'Main dress-Menswear',
       'Night and Dressing Wear', 'Costumes', 'Textiles-Costumes',
       'Costumes-Tapestries', 'Ceramics', 'Textiles-Embroidered',
       'Main dress-Childrenswear', 
       'Costumes-Woven', 'Costumes-Velvets', 'Outerwear-Menswear']

df_sel[df_sel['classification'].isin(sel)]

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber,message
0,53715.0,False,43.119,1943.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,214.0,
21,86241.0,False,C.I.69.5,1969.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2021-04-20T04:40:27.437Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
22,85550.0,False,C.I.54.15,1954.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2021-04-20T04:40:27.437Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
23,85503.0,False,1975.388.1,1975.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2021-04-06T04:41:04.967Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
28,126846.0,False,C.I.41.86.7,1941.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,[],,Islamic Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2185,448246.0,False,"29.106a, b",1929.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2022-09-09T04:54:13.663Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Men', 'AAT_URL': 'http://vocab.gett...",,False,,
2225,229059.0,False,64.101.1363,1964.0,True,https://images.metmuseum.org/CRDImages/es/orig...,https://images.metmuseum.org/CRDImages/es/web-...,['https://images.metmuseum.org/CRDImages/es/or...,,European Sculpture and Decorative Arts,...,,,2021-06-29T04:39:38.347Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Faces', 'AAT_URL': 'http://vocab.ge...",,True,,
2262,447958.0,False,27.13.9,1927.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,True,,
2284,452400.0,False,1972.120.3,1972.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,True,,


In [107]:
df_sel['department'].value_counts()

Asian Art                                 781
European Paintings                        159
Costume Institute                         149
Drawings and Prints                       126
Islamic Art                               121
European Sculpture and Decorative Arts     79
Ancient Near Eastern Art                   54
Medieval Art                               30
The Cloisters                              28
Photographs                                26
Robert Lehman Collection                   24
The Michael C. Rockefeller Wing            11
The American Wing                          11
Egyptian Art                               10
Arms and Armor                              8
Greek and Roman Art                         4
Musical Instruments                         3
Name: department, dtype: int64

In [108]:
df_sel['objectID'] = df_sel['objectID'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sel['objectID'] = df_sel['objectID'].astype('int')


In [109]:
df_sel[df_sel['objectName'].str.contains('robe')==True]

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber,message
2,751497,True,2017.717,2017.0,True,https://images.metmuseum.org/CRDImages/ad/orig...,https://images.metmuseum.org/CRDImages/ad/web-...,[],,The American Wing,...,,,2022-06-02T12:28:57.13Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,https://www.wikidata.org/wiki/Q104413096,False,,
115,69855,False,46.187.6,1946.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Peonies', 'AAT_URL': 'http://vocab....",,False,,
120,68577,False,30.75.98,1930.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2022-09-13T04:54:15.93Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,213.0,
132,229782,False,1983.574.2a–c,1983.0,True,https://images.metmuseum.org/CRDImages/es/orig...,https://images.metmuseum.org/CRDImages/es/web-...,['https://images.metmuseum.org/CRDImages/es/or...,,European Sculpture and Decorative Arts,...,,,2020-09-16T18:35:19.457Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
134,454043,False,2003.416a–e,2001.0,True,https://images.metmuseum.org/CRDImages/is/orig...,https://images.metmuseum.org/CRDImages/is/web-...,['https://images.metmuseum.org/CRDImages/is/or...,,Islamic Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,True,460.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
803,70233,False,58.97.2,1958.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
892,70397,False,65.217,1965.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-04-22T05:09:02.033Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Dragons', 'AAT_URL': 'http://vocab....",,False,,
901,53619,False,45.123.4,1945.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Costumes', 'AAT_URL': 'http://vocab...",,False,,
1028,61806,False,2001.428.23,2001.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Leaves', 'AAT_URL': 'http://vocab.g...",,False,,


In [110]:
df_sel[df_sel['title'].str.contains('robe') == True]

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber,message
0,53715,False,43.119,1943.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,214.0,
2,751497,True,2017.717,2017.0,True,https://images.metmuseum.org/CRDImages/ad/orig...,https://images.metmuseum.org/CRDImages/ad/web-...,[],,The American Wing,...,,,2022-06-02T12:28:57.13Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,https://www.wikidata.org/wiki/Q104413096,False,,
116,69900,False,49.32.125,1949.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Cranes', 'AAT_URL': 'http://vocab.g...",,False,213.0,
120,68577,False,30.75.98,1930.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2022-09-13T04:54:15.93Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,213.0,
132,229782,False,1983.574.2a–c,1983.0,True,https://images.metmuseum.org/CRDImages/es/orig...,https://images.metmuseum.org/CRDImages/es/web-...,['https://images.metmuseum.org/CRDImages/es/or...,,European Sculpture and Decorative Arts,...,,,2020-09-16T18:35:19.457Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Robes', 'AAT_URL': 'http://vocab.ge...",,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1346,55960,False,1972.18,1972.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,,Asian Art,...,,,2021-12-01T04:35:59Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Books', 'AAT_URL': 'http://vocab.ge...",,False,,
1348,816191,False,2018.853.4a–c,2018.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,"[{'constituentID': 37502, 'role': 'Artist', 'n...",Asian Art,...,,,2022-06-02T12:28:57.13Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,https://www.wikidata.org/wiki/Q78610277,False,,
1685,675966,False,2015.741.9,2015.0,True,https://images.metmuseum.org/CRDImages/es/orig...,https://images.metmuseum.org/CRDImages/es/web-...,['https://images.metmuseum.org/CRDImages/es/or...,,European Sculpture and Decorative Arts,...,,,2022-05-20T05:08:55.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Men', 'AAT_URL': 'http://vocab.gett...",,False,,
1686,675971,False,2015.741.14,2015.0,True,https://images.metmuseum.org/CRDImages/es/orig...,https://images.metmuseum.org/CRDImages/es/web-...,['https://images.metmuseum.org/CRDImages/es/or...,,European Sculpture and Decorative Arts,...,,,2022-05-20T05:08:55.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Men', 'AAT_URL': 'http://vocab.gett...",,False,,


In [111]:
df_sel = df_sel[df_sel['primaryImage'].isna() != True]
df_sel.shape

(1623, 58)

In [112]:
df_sel

Unnamed: 0,objectID,isHighlight,accessionNumber,accessionYear,isPublicDomain,primaryImage,primaryImageSmall,additionalImages,constituents,department,...,rightsAndReproduction,linkResource,metadataDate,repository,objectURL,tags,objectWikidata_URL,isTimelineWork,GalleryNumber,message
0,53715,False,43.119,1943.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,[],,Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Bats', 'AAT_URL': 'http://vocab.get...",,False,214.0,
2,751497,True,2017.717,2017.0,True,https://images.metmuseum.org/CRDImages/ad/orig...,https://images.metmuseum.org/CRDImages/ad/web-...,[],,The American Wing,...,,,2022-06-02T12:28:57.13Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,https://www.wikidata.org/wiki/Q104413096,False,,
7,108103,False,1975.227.9,1975.0,True,https://images.metmuseum.org/CRDImages/ci/orig...,https://images.metmuseum.org/CRDImages/ci/web-...,['https://images.metmuseum.org/CRDImages/ci/or...,,Costume Institute,...,,,2020-03-02T21:50:01.377Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
8,102549,False,45.73,1945.0,True,https://images.metmuseum.org/CRDImages/ci/orig...,https://images.metmuseum.org/CRDImages/ci/web-...,[],,Costume Institute,...,,,2020-03-02T21:50:01.377Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
9,107996,False,1972.78.3,1972.0,True,https://images.metmuseum.org/CRDImages/ci/orig...,https://images.metmuseum.org/CRDImages/ci/web-...,['https://images.metmuseum.org/CRDImages/ci/or...,,Costume Institute,...,,,2020-03-02T21:50:01.377Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,,,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2459,436797,False,41.190.19,1941.0,True,https://images.metmuseum.org/CRDImages/ep/orig...,https://images.metmuseum.org/CRDImages/ep/web-...,['https://images.metmuseum.org/CRDImages/ep/or...,"[{'constituentID': 162015, 'role': 'Artist', '...",European Paintings,...,,,2022-11-02T04:54:43.4Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Virgin Mary', 'AAT_URL': 'http://vo...",https://www.wikidata.org/wiki/Q19904858,False,,
2460,53233,False,2015.300.67,2015.0,True,https://images.metmuseum.org/CRDImages/as/orig...,https://images.metmuseum.org/CRDImages/as/web-...,['https://images.metmuseum.org/CRDImages/as/or...,"[{'constituentID': 11724, 'role': 'Artist', 'n...",Asian Art,...,,,2022-10-20T04:55:06.267Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Musical Instruments', 'AAT_URL': 'h...",https://www.wikidata.org/wiki/Q78760038,False,,
2461,435838,False,05.39.2,1905.0,True,https://images.metmuseum.org/CRDImages/ep/orig...,https://images.metmuseum.org/CRDImages/ep/web-...,['https://images.metmuseum.org/CRDImages/ep/or...,"[{'constituentID': 161740, 'role': 'Artist', '...",European Paintings,...,,,2022-10-31T04:54:57.077Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Interiors', 'AAT_URL': 'http://voca...",https://www.wikidata.org/wiki/Q19911530,False,,
2462,435640,False,30.95.256,1930.0,True,https://images.metmuseum.org/CRDImages/ep/orig...,https://images.metmuseum.org/CRDImages/ep/web-...,[],"[{'constituentID': 161668, 'role': 'Artist', '...",European Paintings,...,,,2022-10-31T04:54:57.077Z,"Metropolitan Museum of Art, New York, NY",https://www.metmuseum.org/art/collection/searc...,"[{'term': 'Madonna and Child', 'AAT_URL': 'htt...",https://www.wikidata.org/wiki/Q19911532,False,,


### download 

In [113]:
df_sel.shape

(1623, 58)

In [236]:
download_img(df_sel,'robe/')

start!
error id 45088
1 errors in total
error id 69010
2 errors in total
error id 85436
3 errors in total
error id 68295
4 errors in total
error id 86895
5 errors in total
error id 81687
6 errors in total
error id 84299
7 errors in total
error id 86178
8 errors in total
error id 38123
9 errors in total
error id 437028
10 errors in total
error id 61833
11 errors in total
error id 468576
12 errors in total
error id 82630
13 errors in total
error id 105411
14 errors in total
error id 84436
15 errors in total
error id 84414
16 errors in total
error id 82158
17 errors in total
error id 436908
18 errors in total
3763.592482328415 secs slipped...


## Asian Art + keywords

### download dataframe

In [3]:
df_asian = pd.read_csv('tran_file\df_asian.csv') # this csv generated in notebook 'data wrangling'
df_asian.shape

(682, 54)

In [101]:
idlist = df_asian['Object ID'].to_list()
print(f'{len(idlist)} objects')

682 objects


In [13]:
df_asian_api = scrapping_by_idlist(idlist)

111.81104588508606 secs slipped...


In [20]:
df_asian_api.to_csv('scrapped_data/MET_Asian_Art_Keywords.csv',index=False)

In [94]:
df_asian_api = pd.read_csv('scrapped_data/MET_Asian_Art_Keywords.csv')

### download images

In [96]:
df_asian_api = df_asian_api[df_asian_api['primaryImage'] != '']

In [97]:
df_asian_api.shape

(471, 57)

In [18]:
download_img(df_asian_api,'asian_art/')

start!
error id 45088
1 errors in total
error id 68295
2 errors in total
error id 69010
3 errors in total
error id 70630
4 errors in total
error id 88642
5 errors in total
error id 97475
6 errors in total
error id 126433
7 errors in total
598.0307347774506 secs slipped...


## Full Selection

In [116]:
df_full_selection = pd.read_csv('tran_file/df_full_selection.csv')

In [117]:
df_full_selection.shape

(14292, 54)

In [118]:
idlist = df_full_selection['Object ID'].to_list()
print(f'{len(idlist)} objects')

14292 objects


### download dataframe

In [None]:
df_full_selection_api = scrapping_by_idlist(idlist)

In [None]:
df_full_selection_api.to_csv('scrapped_data/MET_full_selection_Keywords.csv',index=False)

In [None]:
df_full_selection_api = pd.read_csv('scrapped_data/MET_full_selection_Keywords.csv')

### download images

In [None]:
df_full_selection_api = df_full_selection_api[df_full_selection_api['primaryImage'] != '']

In [None]:
df_full_selection_api.shape

In [None]:
download_img(df_full_selection_api,'asian_art/')