## Configuration

In [109]:
### RUN THIS CELL TO BEGIN CONFIGURATION ###
%run manage_data_nodes_config.ipynb

In [None]:
### LEGACY CONFIGURATION CELL -- SKIP THIS IF USING THE CONFIGURATION FORM ABOVE ###

# Configure Action: DELETE, DISPLAY, INSERT, or UPDATE

action = 'INSERT'
namespace = 'we1sv1.1'

# Data Node configurationRawData or ProcessedData

datatype = 'RawData' # string: "RawData" or "ProcessedData" 
collection = '' # string: The _id of the target collection 

# Optional Data Node Properties -- Additional properties may be added but must be defined in the schema

relationships = [] # list
OCR = True # Boolean
rights = "" # string
title = "" # string
altTitle = "" # string
date = [] # list
description = "" # string
label = "" # string
notes = "" # list

# Define the schema to be used to create the manifest
if datatype == 'RawData':
    schema = [
        { "name": "namespace", "type": "string" },
        { "name": "path", "type": "string" },
        { "name": "relationships", "type": "list" },    
        { "name": "OCR", "type": "bool" },    
        { "name": "rights", "type": "string" },
        { "name": "title", "type": "string" },
        { "name": "altTitle", "type": "string" },
        { "name": "date", "type": "list" },
        { "name": "description", "type": "string" },
        { "name": "label", "type": "string" },
        { "name": "notes", "type": "list" }
    ]
else:
    schema = [
        { "name": "namespace", "type": "string" },
        { "name": "path", "type": "string" },
        { "name": "processes", "type": "list" },            
        { "name": "title", "type": "string" },
        { "name": "altTitle", "type": "string" },
        { "name": "date", "type": "list" },
        { "name": "description", "type": "string" },
        { "name": "label", "type": "string" },
        { "name": "notes", "type": "list" }
    ]

## Basic Setup

In [None]:
# If using the configuration form, get the values from the form
try:
    assert config.values['datatype']
    namespace = "we1sv1.1"
    action = config.values['action'].upper()
    datatype = config.values['datatype']
    title = config.values['title']
    altTitle = config.values['altTitle']
    date = config.values['date']
    description = config.values['description']
    label = config.values['label']
    notes = config.values['notes']
    if datatype = 'RawData':
        collection = config.values['collection']
        relationships.values['relationships']
        rights.values['rights']
        OCR = config.values['OCR']
    else:
        processes = config.values['processes']
except:
    pass

# Import dependencies

import pymongo
from pymongo import MongoClient
from jsonschema import validate, FormatChecker

# Set up the MongoDB client, configure the databases, and assign variables to the "collections"

client = MongoClient('mongodb://localhost:27017')
db = client.we1s
Corpus = db.Corpus

# Auto-generate `_id` and `path` values for the Data Node
_id = datatype
path = ',Corpus,' + collection + ','

# Pass the configurations to a variable
opts = globals()

## API Methods

In [None]:
def display_node(_id, path):
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        print(Corpus.find_one(_id, path))
    except:
        print('The data node does not exist in the specified collection.')


def delete_data_node(_id, path):
    """
    Inserts a Data Node based on the user configuration.
    """
    try:
        assert Corpus.delete({'_id': _id, 'path': path})
        print('A ' + datatype + ' manifest has been deleted from the ' + collection + 'collection.')
    except:
        print('Unknown Error: The ' + datatype + ' manifest could not be deleted from the ' + collection + 'collection.')
        
        
def get_properties(opts, schema):
    """
    Returns a dict of properties from the configured values.
    """
    properties = {}
    for item in schema:
        key = item['name']
        val = opts[key]
        validate_datatype(key, val, item['type'])
        # Eliminate empty properties from the manifest
        if isinstance(val, str) and val != "":
            properties[key] = val
        if isinstance(val, list) and val[0] != "":
            properties[key] = val
    return properties


def insert_data_node(_id, opts, schema):
    """
    Inserts a Data Node based on the user configuration.
    """
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        print('The Data Node already exists in the specified collection. If you wish to update it, set `action` to "UPDATE".')
    except:
        properties = get_properties(opts, schema)
        if validate_manifest(properties) == True:
            Corpus.insert_one(manifest)
            print('A ' + datatype + ' manifest has been inserted in the ' + collection + 'collection.')
        else:
            print("Error: Could not produce a valid manifest.")


def update_data_node(_id, opts, schema):
    """
    Updates a Data Node manifest based on the user configuration.
    """
    try:
        assert Corpus.find_one({'_id': _id, 'path': path})
        properties = get_properties(opts, schema)
        properties.pop('_id', None)
        properties.pop('path', None)
        Corpus.update_one({'_id': _id, 'path': path}, {"$set": properties}, upsert=False)
        print('The manifest for `_id` '+ _id + ' has been updated.')
    except:
        print('The Data Node does not exist in the specified collection.')

        
def validate_datatype(key, val, datatype):
    datatype = datatype.replace('string', 'str')
    try:
        assert isinstance(val, eval(datatype))
    except:
        msg = 'Error: The value "' + str(val) + '" is of the wrong data type for "' + key + '". '
        msg += 'Change the value to a ' + datatype + '.'
        print(msg)

        
def validate_manifest(manifest):
    """
    Validates a manifest against the online manifest schema.
    """
    schema_file = 'https://raw.githubusercontent.com/whatevery1says/manifest/master/schema/Corpus/' + datatype + '.json'
    schema = json.loads(requests.get(schema_file).text)
    try:
        validate(manifest, schema, format_checker=FormatChecker())
        return True
    except:
        return False

## Execute Action

In [None]:
if action == 'INSERT':
    insert_data_node(_id, opts, schema)
elif action = 'UPDATE':
    update_data_node(_id, opts, schema)
elif action = 'DISPLAY':
    display_node(_id, path)
else:
    print('Please configure the `action` variable at the top of the notebook.')