## Configuration

In [None]:
### RUN THIS CELL TO BEGIN CONFIGURATION ###
%run create_collection_config.ipynb

In [None]:
### LEGACY CONFIGURATION CELL -- SKIP THIS IF USING THE CONFIGURATION FORM ABOVE ###

# Choose Action : Create Manifest or Insert Manifest in Database

action = 'Create Manifest'

# Required Properties
_id = '' # string
path = ',Corpus,' # Do not change
publications = [',Publications,new_york_times,'] # list
description = '' # string
collectors = ['John Smith'] # list
date = ['2017-01-01'] # a list

# Optional Properties
workstation = '' # string
queryTerms = ['humanities'] # list
processes = ['path to process'] # list
title = '' # string
altTitle = '' # string
label = '' # string
notes = ['A note']  # list


## Basic Setup

In [None]:
# If using the configuration form, get the values from the form
try:
    assert config.values['_id']
    action = config.values['action']
    _id = config.values['_id']
    path = config.values['path']
    publications = config.values['publications']
    description = config.values['description']
    collectors = config.values['collectors']
    date = config.values['date']
    workstation = config.values['workstation']
    queryTerms = config.values['query_terms']
    processes = config.values['processes']
    title = config.values['altTitle']
    altTitle = config.values['title']
    label = config.values['label']
    notes = config.values['notes']
except:
    pass
namespace = 'we1sv1.1'

# Import dependencies

import pymongo, requests, json
from pymongo import MongoClient
from jsonschema import validate, FormatChecker

# Set up the MongoDB client, configure the databases, and assign variables to the "collections" 
client = MongoClient('mongodb://localhost:27017')
db = client.we1s
Corpus = db.Corpus

# Define the schema to be used to create the manifest
schema = [
    { "name": "namespace", "type": "string" },
    { "name": "_id", "type": "string" },
    { "name": "path", "type": "string" },    
    { "name": "publications", "type": "list" },    
    { "name": "description", "type": "string" },    
    { "name": "collectors", "type": "list" },    
    { "name": "date", "type": "list" },        
    { "name": "workstation", "type": "string" },    
    { "name": "queryTerms", "type": "list" },    
    { "name": "processes", "type": "list" },
    { "name": "title", "type": "string" },
    { "name": "altTitle", "type": "string" },
    { "name": "label", "type": "string" },
    { "name": "notes", "type": "list" },
]

# Pass the configurations to a variable
opts = globals()

## API Methods

In [None]:
def create_manifest(opts, schema):
    manifest = {}
    for item in schema:
        key = item['name']
        val = opts[key]
        validate_datatype(key, val, item['type'])
        # Eliminate empty properties from the manifest
        if isinstance(val, str) and val != "":
            manifest[key] = val
        if isinstance(val, list) and val[0] != "":
            manifest[key] = val
    # Validate the manifest against the schema
    if validate_manifest(manifest) == True:
        return manifest
    else:
        print('Could not produce a valid manifest. Please check the form of ' + key + '.')

def validate_datatype(key, val, datatype):
    datatype = datatype.replace('string', 'str')
    try:
        assert isinstance(val, eval(datatype))
    except:
        msg = 'Error: The value "' + str(val) + '" is of the wrong data type for "' + key + '". '
        msg += 'Change the value to a ' + datatype + '.'
        print(msg)
        
def insert_collection(manifest):
    """
    Inserts a collection manifest after checking for a unique `_id`.
    """
    try:
        assert manifest['_id'] not in Corpus.distinct("_id")
        Corpus.insert_one(manifest)
        print('Inserted manifest with `_id` "' + manifest['_id'] + '".')
    except:
        print('The `_id` "' + manifest['_id'] + '" already exists in the database.')
        
def validate_manifest(manifest):
    """
    Validates a manifest against the online manifest schema.
    """
    schema_file = 'https://raw.githubusercontent.com/whatevery1says/manifest/master/schema/Corpus/collection.json'
    schema = json.loads(requests.get(schema_file).text)
    try:
        validate(manifest, schema, format_checker=FormatChecker())
        return True
    except:
        return False


## Execute Action

In [None]:
manifest = create_manifest(opts, schema)
if action == 'Create Manifest':
    print('To insert this manifest in the database, select "Insert into Database" in the configuration form, then re-run the Basic Setup and Execute Action cells.\n\n')
    print(json.dumps(manifest, indent=4))
else:
    print('Adding the manifest below to the database...\n')
    print(manifest)
    print('\n')
    insert_collection(manifest)