In [1]:
from pymongo import MongoClient
from pymongo.encryption import (ClientEncryption, AutoEncryptionOpts)
from bson.codec_options import CodecOptions
from bson.binary import STANDARD
from dotenv import load_dotenv
from datetime import datetime, time
import os

In [8]:
# load variables from .env file
load_dotenv()

# setup notebooks variables
uri = os.environ['MONGODB_URI']  # Your connection URI
key_vault_database_name = "encryption"
key_vault_collection_name = "__keyVault_customers"
key_vault_namespace = f"{key_vault_database_name}.{key_vault_collection_name}"
encrypted_database_name = "sample_analytics"
encrypted_collection_name = "customers_enc"
source_database_name = "sample_analytics"
source_collection_name = "customers"

In [3]:
# create or reuse the CMK from the customer-master-key.txt file if it exists
if not os.path.exists("./customer-master-key.txt"):
    try:
        path = "./customer-master-key.txt"
        file_bytes = os.urandom(96)
        with open(path, "wb") as f:
            f.write(file_bytes)
        print("CMK file successfuly created.")
    except Exception as e:
        raise Exception("Unable to write Customer Master Key to file due to the following error: ", e)
else:
    print("CMK file already exists.")


CMK file already exists.


In [4]:
# build local kms provider credentials with the local CMK
try:
    path = "./customer-master-key.txt"
    with open(path, "rb") as f:
        local_master_key = f.read()
        kms_provider_credentials = {
            "local": {
                "key": local_master_key
            },
        }
except Exception as e:
    raise Exception("Unable to read Customer Master Key from file due to the following error: ", e)

In [9]:
# build MongoDB encrypted client
encrypted_client = MongoClient(
    uri,
    auto_encryption_opts= AutoEncryptionOpts(
        kms_provider_credentials,
        key_vault_namespace,
        crypt_shared_lib_required=True,
        crypt_shared_lib_path=os.environ['SHARED_LIB_PATH']
    )
)

In [10]:
# define encrypted fields mapping  
encrypted_fields_map = {
    "fields": [
        {
            "path": "name",
            "bsonType": "string",
            "queries": [{"queryType": "equality"}]
        },
        {
            "path": "birthdate",
            "bsonType": "date",
            "queries": [{"queryType": "equality"}]
        },   
        {
            "path": "active",
            "bsonType": "bool",
            "queries": [{"queryType": "equality"}]
        },              
        {
            "path": "accounts",
            "bsonType": "array"
        },
        {
            "path": "address",
            "bsonType": "string"
        },        
        {
            "path": "email",
            "bsonType": "string",
            "queries": [{"queryType": "equality"}]
        }
    ]
}

In [11]:
# delete existing keyVault and encrypted collection
encrypted_client[key_vault_database_name][key_vault_collection_name].drop()
encrypted_client[encrypted_database_name][encrypted_collection_name].drop()

In [12]:
# create the encrypted collection
client_encryption = ClientEncryption(
        kms_provider_credentials,
        key_vault_namespace,
        encrypted_client,
        codec_options=CodecOptions(uuid_representation=STANDARD)
)  
try:
    client_encryption.create_encrypted_collection(
        encrypted_client[encrypted_database_name],
        encrypted_collection_name,
        encrypted_fields_map,
        "local",
        {},
    )
except Exception as e:
    raise Exception("Unable to create encrypted collection due to the following error: ", e)

In [13]:
# copy data from source collection to encrypted collection
encrypted_collection = encrypted_client[encrypted_database_name][encrypted_collection_name]
source_collection = encrypted_client[source_database_name][source_collection_name]

for doc in source_collection.find():
    doc['birthdate'] = datetime.combine(doc['birthdate'].date(), time(0))
    result = encrypted_collection.insert_one(doc)
