In [1]:
# Uncomment these lines if you have not installed any packages yet
# !pip install vislearnlabpy

To open in Google CoLab: https://colab.research.google.com/github/vislearnlab/vllpy/blob/main/examples/drawings/mongo_retrieval.ipynb

In [1]:
import os
from pathlib import Path

from PIL import Image
import pymongo as pm
from dotenv import load_dotenv
load_dotenv()
from vislearnlabpy.extractions.drawings import MongoExtractor

Fill out the fields below. Copy the .env_template file to a .env file and enter your username and password.

In [2]:
# make sure to add password to a .env file
DATABASE_NAME = "kiddraw"
COLLECTION_NAME = "birch_run_v1"
SAVE_DIR = Path(os.getcwd()) / "mongo_output" # often set to /Volumes/vislearnlab/experiments/drawings/data..

This is the Mongo extractor

In [3]:
extractor = MongoExtractor(os.getenv("MONGO_CONNECTION"), DATABASE_NAME, COLLECTION_NAME, SAVE_DIR)

Can be used to directly prompt Mongo with PyMongo

In [4]:
all_images = list(extractor.collection.find({'$and': [{'dataType':'finalImage'}]}))
print(f"Total final images in {extractor.collection.name}: {len(all_images)}")

Total final images in birch_run_v1: 339


Or can extract full datasets as needed.

In [5]:
extractor.extract_images(transform_file=True)

Drawing sessions processed: 100%|██████████| 12/12 [00:00<00:00, 17.54it/s]

Finished processing 0 image files





In [6]:
extractor.extract_audio()

Processing 34 sessions for audio extraction


Knowledge trial sessions processed:   3%|▎         | 1/34 [00:17<09:31, 17.30s/it]


KeyboardInterrupt: 

Extracting strokes!

In [7]:
shark_strokes = list(extractor.collection
                     
# sessionId is the unique identifier for each participant session
                .find({#"sessionId": session_id,
                    
                    "dataType": "stroke",

# Trial number is the index of the trial of the current participant
                    #"trialNum": trial_num,

# Categories usually include articles, you can find all the categories here: (https://github.com/brialorelle/museumkiosk/blob/83fff9902c2cdd4369540c1feed5c3a1be1c655f/code/experiment/trial-sequence.js#L37)
                    "category": "a shark"
                    }).sort("startTrialTime"))

These are the SVG values for each stroke that can be rendered as png files etc.

In [8]:
[s['svg'] for s in shark_strokes]

['M622.5,231.82813l-32,18l-49.99244,28.68017l-49.00756,30.31983l-60.1856,41.51491l-59.71726,42.33003l-61.26583,39.65756l-31.84391,17.73673l-32.9874,15.76077l-17.95937,7.18946l-19.05253,5.83181l-19.54658,3.32118l-19.44152,-0.34244l-12.67831,-3.62873l-13.39329,-7.1484l-13.49317,-9.9077l-12.97794,-11.90663l-11.8476,-13.1452l-10.10215,-13.6234l-7.7416,-13.34124l-4.76593,-12.2987l-2.11588,-16.76115l2.34812,-15.78462l6.07881,-14.72333l9.07621,-13.57729l11.3403,-12.34649l12.87109,-11.03094l27.40135,-17.77619l31.18265,-13.72312l32.49244,-8.41429l33.37125,-3.69229l33.81906,0.44288l33.83589,3.99124l33.42172,6.95277l32.57657,9.32747l31.30042,11.11535l79.02773,32.9624l38.02547,19.06229l36.9468,21.97532l32.93205,20.94567l34.06795,19.05433l5,2l2.71192,1.06351l5.33016,1.97997l5.30912,1.70823l2.6488,0.24828l1.08214,-5.40113l-0.27598,-7.09277l-0.92906,-7.09582l-0.8771,-5.41028l-4.79488,-21.90835l-6.68986,-21.76181l-13.51525,-43.32984l-11,-47l-2,-23l-1,-1',
 'M203.5,383.82813l1.22574,-15.54484l6.04769,-