In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

# assign the met database to a variable name
db = mongo['met']

# assign the collection to a variable
artifacts = db['artifacts']

* Write a query that:

    * Uses `find()` to find documents about artifacts that come from the "Maya" culture and returns the following fields: `accessionNumber`, `accessionYear`, `classification`, `country`, `department`, `measurements.elementMeasurements.Height`, `measurements.elementMeasurements.Width`, `measurements.elementMeasurements.Depth`, `medium`, `title`,`objectURL`.

    * Uses `sort()` to sort by the artifact's height.

    * Uses `limit()` to limit the number of results to 5.

In [3]:
# Query
query = {'culture': "Maya"}
fields = {'accessionNumber': 1, 'accessionYear': 1, 'classification': 1, 'country': 1, 
          'department': 1, 'measurements.elementMeasurements.Height': 1, 
          'measurements.elementMeasurements.Width': 1, 'measurements.elementMeasurements.Depth': 1, 
          'medium': 1, 'title': 1, 'objectURL': 1}
sort = [('measurements.elementMeasurements.Height', -1)]
limit = 5

# Cast the results as a list and save them to a variable
results = list(artifacts.find(query, fields).sort(sort).limit(limit))

# Pretty print the results
pprint(results)

[{'_id': ObjectId('63c885a8435617897adf530b'),
  'accessionNumber': '66.181',
  'accessionYear': '1966',
  'classification': 'Stone-Sculpture',
  'country': 'Mexico',
  'department': 'The Michael C. Rockefeller Wing',
  'measurements': [{'elementMeasurements': {'Depth': 50.800102,
                                            'Height': 215.58293,
                                            'Width': 60.96012}}],
  'medium': 'Limestone',
  'objectURL': 'https://www.metmuseum.org/art/collection/search/309404',
  'title': 'Monumental figure (Chahk)'},
 {'_id': ObjectId('63c885a8435617897adf50cd'),
  'accessionNumber': '1979.206.1047',
  'accessionYear': '1979',
  'classification': 'Stone-Sculpture',
  'country': 'Guatemala or Mexico',
  'department': 'The Michael C. Rockefeller Wing',
  'measurements': [{'elementMeasurements': {'Depth': 6.99,
                                            'Height': 88.9,
                                            'Width': 87.63}}],
  'medium': 'Limestone, pain

In [4]:
# Convert results to DataFrame
top_maya_artifacts_by_height_df = pd.DataFrame(results)
top_maya_artifacts_by_height_df

Unnamed: 0,_id,accessionNumber,accessionYear,department,title,medium,measurements,country,classification,objectURL
0,63c885a8435617897adf530b,66.181,1966,The Michael C. Rockefeller Wing,Monumental figure (Chahk),Limestone,"[{'elementMeasurements': {'Depth': 50.800102, ...",Mexico,Stone-Sculpture,https://www.metmuseum.org/art/collection/searc...
1,63c885a8435617897adf50cd,1979.206.1047,1979,The Michael C. Rockefeller Wing,Relief with Enthroned Ruler,"Limestone, paint","[{'elementMeasurements': {'Depth': 6.99, 'Heig...",Guatemala or Mexico,Stone-Sculpture,https://www.metmuseum.org/art/collection/searc...
2,63c885a8435617897adf509f,"1999.484.1a, b",1999,The Michael C. Rockefeller Wing,"Censer, Seated King",Ceramic,"[{'elementMeasurements': {'Depth': 22.860046, ...",Guatemala,Ceramics-Containers,https://www.metmuseum.org/art/collection/searc...
3,63c885a8435617897adf522e,1978.412.99,1978,The Michael C. Rockefeller Wing,Censer Support,Ceramic,"[{'elementMeasurements': {'Depth': 34.93}}, {'...",Mexico,Ceramics-Sculpture,https://www.metmuseum.org/art/collection/searc...
4,63c885a8435617897adf5353,"1982.394a, b",1982,The Michael C. Rockefeller Wing,Seated Figure Censer (Incensario),Ceramic,[{'elementMeasurements': {'Height': 37.465}}],Guatemala,Ceramics-Containers,https://www.metmuseum.org/art/collection/searc...


In [5]:
# Build the aggregation pipeline
# Write a match query to find only the documents about artifacts that
# have a classification where "Sculpture" is contained the value, and
# have a width greater than or equal to 10cm and less than 50cm, and
# have a height greater than or equal to 20cm and less than 60cm.
match_query = {'$match': {'classification': {'$regex': "Sculpture"},
                          'measurements.elementMeasurements.Width': {'$gte': 10, '$lt': 50},
                          'measurements.elementMeasurements.Height': {'$gte': 20, '$lt': 60}
                         }
              }

# Write an aggregation query that counts the number of documents, grouped by "classification" and "culture"
group_query = {'$group': {'_id': {"classification": "$classification",
                                  "culture": "$culture"}, 
                          'count': { '$sum': 1 }
                         }
              }

# Create a dictionary that will allow the pipeline to sort by count in descending order
sort_values = {'$sort': { 'count': -1 }}

# Put the pipeline together
pipeline = [match_query, group_query, sort_values]

In [6]:
# Run the pipeline through the aggregate method, cast the results as a list, and save the results to a variable
results = list(artifacts.aggregate(pipeline))

In [7]:
# Print the number of rows in the result
print("Number of rows in result: ", len(results))

Number of rows in result:  63


In [8]:
# Print the first 10 results
pprint(results[0:10])

[{'_id': {'classification': 'Wood-Sculpture', 'culture': 'Baule peoples'},
  'count': 6},
 {'_id': {'classification': 'Stone-Sculpture', 'culture': 'Veracruz'},
  'count': 5},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': 'Bamana peoples'},
  'count': 5},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': 'Senufo peoples'},
  'count': 4},
 {'_id': {'classification': 'Stone-Sculpture', 'culture': 'Aztec'}, 'count': 4},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': ''}, 'count': 4},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': 'Kwele peoples'},
  'count': 3},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': 'Dogon peoples'},
  'count': 3},
 {'_id': {'classification': 'Stone-Sculpture', 'culture': 'Maya'}, 'count': 3},
 {'_id': {'classification': 'Wood-Sculpture', 'culture': 'Bwa peoples'},
  'count': 3}]


In [9]:
# Extract the fields from the _id so they're in separate columns in a Pandas DataFrame
aggregated_df = pd.json_normalize(results)
aggregated_df.head()

Unnamed: 0,count,_id.classification,_id.culture
0,6,Wood-Sculpture,Baule peoples
1,5,Stone-Sculpture,Veracruz
2,5,Wood-Sculpture,Bamana peoples
3,4,Wood-Sculpture,Senufo peoples
4,4,Stone-Sculpture,Aztec


In [10]:
# Rename the columns
aggregated_df = aggregated_df.rename(columns={"count": "number of artifacts",
                                              "_id.classification": "classification",
                                              "_id.culture": "culture"})
aggregated_df.head()

Unnamed: 0,number of artifacts,classification,culture
0,6,Wood-Sculpture,Baule peoples
1,5,Stone-Sculpture,Veracruz
2,5,Wood-Sculpture,Bamana peoples
3,4,Wood-Sculpture,Senufo peoples
4,4,Stone-Sculpture,Aztec


In [11]:
# Reorder the columns
aggregated_df = aggregated_df[["classification", "culture", "number of artifacts"]]

# Print the first 10 rows of the DataFrame
aggregated_df.head(10)

Unnamed: 0,classification,culture,number of artifacts
0,Wood-Sculpture,Baule peoples,6
1,Stone-Sculpture,Veracruz,5
2,Wood-Sculpture,Bamana peoples,5
3,Wood-Sculpture,Senufo peoples,4
4,Stone-Sculpture,Aztec,4
5,Wood-Sculpture,,4
6,Wood-Sculpture,Kwele peoples,3
7,Wood-Sculpture,Dogon peoples,3
8,Stone-Sculpture,Maya,3
9,Wood-Sculpture,Bwa peoples,3


Data Source: [The Metropolitan Museum of Art](https://www.metmuseum.org/) (2022). The Metropolitan Museum of Art Collection API https://metmuseum.github.io/. Licensed under the [Creative Commons 0 License](https://creativecommons.org/publicdomain/zero/1.0/).<br />
Accessed Oct 3, 2022. Data collected from departmentId=5 ("Arts of Africa, Oceania, and the Americas") and search string "animal".