## Configuration

In [None]:
# Configure Action: DELETE, DISPLAY, INSERT, or UPDATE

action = 'INSERT'

# Data Node configurationRawData or ProcessedData

datatype = 'RawData' # string: "RawData" or "ProcessedData" 
collection = '' # string: The _id of the target collection 

# Optional Data Node Properties -- Additional properties may be added but must be defined in the schema

relationships = [] # list
OCR = True # Boolean
rights = "" # string

# Define the schema to be used to create the manifest
schema = [
    { "name": "path", "type": "string" },
    { "name": "relationships", "type": "list" },    
    { "name": "OCR", "type": "bool" },    
    { "name": "rights", "type": "string" }
]

## Basic Setup

In [None]:
# Import dependencies

import pymongo
from pymongo import MongoClient

# Set up the MongoDB client, configure the databases, and assign variables to the "collections"

client = MongoClient('mongodb://localhost:27017')
db = client.we1s
Corpus = db.Corpus

# Auto-generate `_id` and `path` values for the Data Node
_id = datatype
path = ',Corpus,' + collection + ','

# Pass the configurations to a variable
opts = globals()

## API Methods

In [None]:
def display_node(_id, path):
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        print(Corpus.find_one(_id, path))
    except:
        print('The data node does not exist in the specified collection.')


def delete_data_node(_id, path):
    """
    Inserts a Data Node based on the user configuration.
    """
    try:
        assert Corpus.delete({'_id': _id, 'path': path})
        print('A ' + datatype + ' manifest has been deleted from the ' + collection + 'collection.')
    except:
        print('Unknown Error: The ' + datatype + ' manifest could not be deleted from the ' + collection + 'collection.')
        
        
def get_properties(opts, schema):
    """
    Returns a dict of properties from the configured values.
    """
    properties = {}
    for item in schema:
        key = item['name']
        val = opts[key]
        validate_datatype(key, val, item['type'])
        properties[key] = val
    return properties


def insert_data_node(_id, opts, schema):
    """
    Inserts a Data Node based on the user configuration.
    """
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        print('The Data Node already exists in the specified collection. If you wish to update it, set `action` to "UPDATE".')
    except:
        properties = get_properties(opts, schema)
        Corpus.insert_one(properties)
        print('A ' + datatype + ' manifest has been inserted in the ' + collection + 'collection.')


def update_data_node(_id, opts, schema):
    """
    Updates a Data Node manifest based on the user configuration.
    """
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        properties = get_properties(opts, schema)
        properties.pop('_id', None)
        properties.pop('path', None)
        Corpus.update_one({'_id': _id, 'path': path}, {"$set": properties}, upsert=False)
        print('The manifest for `_id` '+ _id + ' has been updated.')
    except:
        print('The Data Node does not exist in the specified collection.')

## Execute Action

In [None]:
if action == 'INSERT':
    insert_data_node(_id, opts, schema)
elif action = 'UPDATE':
    update_data_node(_id, opts, schema)
elif action = 'DISPLAY':
    display_node(_id, path)
else:
    print('Please configure the `action` variable at the top of the notebook.')