## Configuration

In [None]:
# Choose Action : CREATE or IMPORT

action = 'CREATE'

# Required Properties
_id = '' # string
path = ',Corpus,' # Do not change
publications = [',Publications,new_york_times,'] # list
description = '' # string
collectors = ['John Smith'] # list
startdate = '2017-01-01' # string

# Optional Properties
enddate = '' # string
workstation = '' # string
queryTerms = ['humanities'] # list
processes = ['path to process'] # list

## Basic Setup

In [None]:
# Import dependencies

import pymongo
from pymongo import MongoClient

# Set up the MongoDB client, configure the databases, and assign variables to the "collections" 
client = MongoClient('mongodb://localhost:27017')
db = client.we1s
Corpus = db.Corpus

# Define the schema to be used to create the manifest
schema = [
    { "name": "_id", "type": "string" },
    { "name": "path", "type": "string" },    
    { "name": "publications", "type": "list" },    
    { "name": "description", "type": "string" },    
    { "name": "collectors", "type": "list" },    
    { "name": "startdate", "type": "string" },    
    { "name": "enddate", "type": "string" },    
    { "name": "workstation", "type": "string" },    
    { "name": "queryTerms", "type": "list" },    
    { "name": "processes", "type": "list" }
]

# Pass the configurations to a variable
opts = globals()

## API Methods

In [None]:
def create_manifest(opts, schema):
    manifest = {}
    for item in schema:
        key = item['name']
        val = opts[key]
        validate_datatype(key, val, item['type'])
        manifest[key] = val
    rebuild_date(manifest)
    return manifest

def validate_datatype(key, val, datatype):
    datatype = datatype.replace('string', 'str')
    try:
        assert isinstance(val, eval(datatype))
    except:
        msg = 'Error: The value "' + str(val) + '" is of the wrong data type for "' + key + '". '
        msg += 'Change the value to a ' + datatype + '.'
        print(msg)
        
def rebuild_date(manifest):
    start = manifest.pop('startdate')
    end = manifest.pop('enddate')
    try:
        assert len(end > 0)
        date = [{'start': start, 'end': end}]
    except:
        date = [start]
    manifest['date'] = date
    
def insert_collection(manifest):
    """
    Inserts a collection manifest after checking for a unique `_id`.
    """
    try:
        assert manifest['_id'] not in Corpus.distinct("_id")
        Corpus.insert_one(manifest)
        print('Inserted manifest with `_id` "' + manifest['_id'] + '".')
    except:
        print('The `_id` "' + manifest['_id'] + '" already exists in the database.')

## Execute Action

In [None]:
manifest = create_manifest(opts, schema)
if action == 'CREATE':
    print(manifest)
else:
    print('Adding the manifest below to the database...\n')
    print(manifest)
    print('\n')
    insert_collection(manifest)