In [None]:
from pymongo import MongoClient, ASCENDING
from pymongo.encryption import (ClientEncryption, AutoEncryptionOpts, Algorithm)
from bson.codec_options import CodecOptions
from bson.binary import STANDARD
from dotenv import load_dotenv
from datetime import datetime, time
import os

In [None]:
# load variables from .env file
load_dotenv()

# setup notebooks variables
uri = os.environ['MONGODB_URI']  # Your connection URI
key_vault_database_name = "encryption"
key_vault_collection_name = "__keyVault_customers_ex"
key_vault_namespace = f"{key_vault_database_name}.{key_vault_collection_name}"
encrypted_database_name = "sample_analytics"
encrypted_collection_name = "customers_enc_ex"
source_database_name = "sample_analytics"
source_collection_name = "customers"
kms_provider = "local"

In [None]:
# create or reuse the CMK from the customer-master-key.txt file if it exists
if not os.path.exists("./customer-master-key.txt"):
    try:
        path = "./customer-master-key.txt"
        file_bytes = os.urandom(96)
        with open(path, "wb") as f:
            f.write(file_bytes)
        print("CMK file successfuly created.")
    except Exception as e:
        raise Exception("Unable to write Customer Master Key to file due to the following error: ", e)
else:
    print("CMK file already exists.")


In [None]:
# build local kms provider credentials with the local CMK
try:
    path = "./customer-master-key.txt"
    with open(path, "rb") as f:
        local_master_key = f.read()
        kms_provider_credentials = {
            "local": {
                "key": local_master_key
            },
        }
except Exception as e:
    raise Exception("Unable to read Customer Master Key from file due to the following error: ", e)

In [None]:
# build MongoDB encrypted client
encrypted_client = MongoClient(
    uri,
    auto_encryption_opts= AutoEncryptionOpts(
        kms_provider_credentials,
        key_vault_namespace,
        bypass_query_analysis=True
        # crypt_shared_lib_path=os.environ['SHARED_LIB_PATH']
        # cryptSharedLibRequired
    )
)
# TODO this does not delete enxcol_ collections
encrypted_client[key_vault_database_name][key_vault_collection_name].drop()
encrypted_client[encrypted_database_name][encrypted_collection_name].drop()

encrypted_client[key_vault_database_name][key_vault_collection_name].create_index(
    [("keyAltNames", ASCENDING)],
    unique=True,
    partialFilterExpression={"keyAltNames": {"$exists": True}},
)

client_encryption = ClientEncryption(
    kms_provider_credentials,
    key_vault_namespace,
    encrypted_client,
    CodecOptions(uuid_representation=STANDARD),
)
data_key_id_1 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey1"])
data_key_id_2 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey2"])
data_key_id_3 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey3"])
data_key_id_4 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey4"])
data_key_id_5 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey5"])
data_key_id_6 = client_encryption.create_data_key(kms_provider, key_alt_names=["dataKey6"])



In [None]:
# define encrypted fields mapping  
encrypted_fields_map = {
    "fields": [
        {
            "keyId": data_key_id_1,
            "path": "name",
            "bsonType": "string",
            "queries": [{"queryType": "equality"}]
        },
        {
            "keyId": data_key_id_6,
            "path": "birthdate",
            "bsonType": "date",
            "queries": [{"queryType": "equality"}]
        },   
        {
             "keyId": data_key_id_2,
             "path": "active",
            "bsonType": "bool",
            "queries": [{"queryType": "equality"}]
        },              
        {
             "keyId": data_key_id_3,
             "path": "accounts",
            "bsonType": "array"
        },
        {
            "keyId": data_key_id_4,
            "path": "address",
            "bsonType": "string"
        },        
        {
            "keyId": data_key_id_5,
            "path": "email",
            "bsonType": "string",
            "queries": [{"queryType": "equality"}]
        }
    ]
}

In [None]:
# create the encrypted collection
client_encryption = ClientEncryption(
        kms_provider_credentials,
        key_vault_namespace,
        encrypted_client,
        codec_options=CodecOptions(uuid_representation=STANDARD)
)  
try:
    client_encryption.create_encrypted_collection(
        encrypted_client[encrypted_database_name],
        encrypted_collection_name,
        encrypted_fields_map,
        "local",
        {},
    )
except Exception as e:
    raise Exception("Unable to create encrypted collection due to the following error: ", e)

In [None]:
# copy data from source collection to encrypted collection
encrypted_collection = encrypted_client[encrypted_database_name][encrypted_collection_name]
source_collection = encrypted_client[source_database_name][source_collection_name]

for doc in source_collection.find():
    encryptedName = client_encryption.encrypt(
        doc['name'], Algorithm.INDEXED, data_key_id_1, contention_factor=1)
    if 'active' in doc:
        encryptedActive = client_encryption.encrypt(    
            doc['active'], Algorithm.INDEXED, data_key_id_2, contention_factor=1)
        doc['active'] = encryptedActive
    encryptedAccounts =  client_encryption.encrypt(
        doc['accounts'], Algorithm.UNINDEXED, data_key_id_3)
    encryptedAddress =  client_encryption.encrypt(
        doc['address'], Algorithm.UNINDEXED, data_key_id_4)
    encryptedEmail = client_encryption.encrypt(
        doc['email'], Algorithm.INDEXED, data_key_id_5, contention_factor=1)
    encryptedBirthdate = client_encryption.encrypt(
        datetime.combine(doc['birthdate'].date(), time(0)), 
        Algorithm.INDEXED, data_key_id_6, contention_factor=1)
    doc['name'] = encryptedName
    
    doc['accounts'] = encryptedAccounts
    doc['address'] = encryptedAddress
    doc['email'] = encryptedEmail
    doc['birthdate'] = encryptedBirthdate
    result = encrypted_collection.insert_one(doc)