In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

# assign the met database to a variable name
db = mongo['met']

# assign the collection to a variable
artifacts = db['artifacts']

In [3]:
# Build the aggregation pipeline
# Write a match query to find only the documents about artifacts that
# have a classification where "Wood" is the value.
match_query = {'$match': {'classification': {'$regex': "Wood"}}}

# Write an aggregation query that counts the number of documents and finds the maximum height,
# grouped by "classification"
group_query = {'$group': {'_id': "$classification", 
                          'count': { '$sum': 1 },
                          'max_height': { '$max': '$measurements.elementMeasurements.Height' }}}

# Create a dictionary that will allow the pipeline to sort by count in descending order, 
# then sort by classification in alphabetical order
sort_values = {'$sort': { 'count': -1, '_id': 1 }}

# Put the pipeline together
pipeline = [match_query, group_query, sort_values]

In [4]:
# Run the pipeline through the aggregate method and save the results to a variable
results = list(artifacts.aggregate(pipeline))

In [5]:
# Print the number of classifications in the result
print("Number of classifications in result: ", len(results))

Number of classifications in result:  14


In [6]:
# Print the first 10 results
pprint(results[0:10])

[{'_id': 'Wood-Sculpture', 'count': 217, 'max_height': [274.3205]},
 {'_id': 'Wood-Implements', 'count': 27, 'max_height': [181.9279]},
 {'_id': 'Wood-Architectural', 'count': 11, 'max_height': [289.5606]},
 {'_id': 'Wood-Containers', 'count': 11, 'max_height': [67.9451]},
 {'_id': 'Wood-Furniture', 'count': 10, 'max_height': [99.06]},
 {'_id': 'Wood-Musical Instruments', 'count': 10, 'max_height': [157.48032]},
 {'_id': 'Wood-Ornaments', 'count': 5, 'max_height': [30.48006]},
 {'_id': 'Wood-Costumes', 'count': 3, 'max_height': [114.935]},
 {'_id': 'Wood', 'count': 1, 'max_height': [88.90018]},
 {'_id': 'Wood-Paintings', 'count': 1, 'max_height': [70.00889]}]


In [7]:
# Convert mongo result to Pandas DataFrame
result_df = pd.DataFrame(results)

# Print out the length of the DataFrame
print("Rows in DataFrame: ", len(result_df))

# Display the first 10 rows of the DataFrame
result_df.head(10)

Rows in DataFrame:  14


Unnamed: 0,_id,count,max_height
0,Wood-Sculpture,217,[274.3205]
1,Wood-Implements,27,[181.9279]
2,Wood-Architectural,11,[289.5606]
3,Wood-Containers,11,[67.9451]
4,Wood-Furniture,10,[99.06]
5,Wood-Musical Instruments,10,[157.48032]
6,Wood-Ornaments,5,[30.48006]
7,Wood-Costumes,3,[114.935]
8,Wood,1,[88.90018]
9,Wood-Paintings,1,[70.00889]


Data Source: [The Metropolitan Museum of Art](https://www.metmuseum.org/) (2022). The Metropolitan Museum of Art Collection API https://metmuseum.github.io/. Licensed under the [Creative Commons 0 License](https://creativecommons.org/publicdomain/zero/1.0/).<br />
Accessed Oct 3, 2022. Data collected from departmentId=5 ("Arts of Africa, Oceania, and the Americas") and search string "animal".