# InStall Azure CosmosDB Python API Package 

In [None]:
!pip install azure-cosmos

# Install DocumentDB Python API Package 

In [None]:
!pip install pymongo

# Import Python Lib for Azure CosmosDB and AWS Document DB

In [65]:
from azure.cosmos import exceptions, CosmosClient, PartitionKey
import os
import pymongo
import sys
import json
import tqdm
import time

# Set Azure CosmosDB visit url & key 

In [3]:
url = "https://azureadmin.documents.azure.com:443/"
key = "7N2QbaoBtgYdns0jkx1Hub3qSahPHShvAIjvG5upzlMG6qBKsqAAR2sb8gKZG2b1YEXCHm5wKpScTJqitS1lww=="
client = CosmosClient(url, key)

# List Azure CosmosDB Database 

In [42]:
def list_databases(client):
    print('Databases:')
    databases = list(client.list_databases())
    if not databases:
        return
    for database in databases:
        print(database['id'])

In [43]:
list_databases(client)

Databases:
SampleDB
Tasks
AzureSampleFamilyDatabase
ToDoList


In [22]:
def find_database(client, id):
    print('1. Query for Database')

    databases = list(client.query_databases({
        "query": "SELECT * FROM r WHERE r.id=@id",
        "parameters": [
            { "name":"@id", "value": id }
        ]
    }))

    if len(databases) > 0:
        print('Database with id \'{0}\' was found'.format(id))
    else:
        print('No database with id \'{0}\' was found'. format(id))

In [23]:
find_database(client,"SampleDB")

1. Query for Database
Database with id 'SampleDB' was found


# List Azure CosmosDB Database Containers

In [17]:
def list_containers(client,database_name):
    database = client.get_database_client(database_name)
    for container in database.list_containers():
        print("Container ID: {}".format(container['id']))

In [21]:
list_containers(client,"AzureSampleFamilyDatabase")

Container ID: products
Container ID: FamilyContainer
Container ID: customers


# Create Sample Data for Azure CosmosDB

In [84]:
database_name = "AzureSampleFamilyDatabase"
try:
    database = client.create_database(id=database_name)
except exceptions.CosmosResourceExistsError:
    database = client.get_database_client(database=database_name)

container_name = "FamilyContainer"
try:
    container = database.create_container(
        id=container_name, partition_key=PartitionKey(path="/lastName")
    )
except exceptions.CosmosResourceExistsError:
    container = database.get_container_client(container_name)
# [END create_container]

container_name = "products"
try:
    container = database.create_container(
        id=container_name, partition_key=PartitionKey(path="/productName")
    )
except exceptions.CosmosResourceExistsError:
    container = database.get_container_client(container_name)
# [END create_container]

# Create a container with custom settings. This example
# creates a container with a custom partition key.
# [START create_container_with_settings]
customer_container_name = "customers"
try:
    customer_container = database.create_container(
        id=customer_container_name,
        partition_key=PartitionKey(path="/city"),
        default_ttl=200,
    )
except exceptions.CosmosResourceExistsError:
    customer_container = database.get_container_client(customer_container_name)

for container in database.list_containers():
    print("Container ID: {}".format(container['id']))

# Insert new items by defining a dict and calling Container.upsert_item
# [START upsert_items]
container = database.get_container_client(container_name)
for i in range(1, 10):
    container.upsert_item(
        dict(id="item{}".format(i), productName="Widget", productModel="Model {}".format(i))
    )
# [END upsert_items]

# Modify an existing item in the container
# [START update_item]
item = container.read_item("item2", partition_key="Widget")
item["productModel"] = "DISCONTINUED"
updated_item = container.upsert_item(item)
# [END update_item]

Container ID: products
Container ID: FamilyContainer
Container ID: customers


# Set AWS DocumentDB Connectin String 

In [31]:
DocumentDB_client = pymongo.MongoClient('mongodb://oceanadmin:Password01!@ocean-documentdb.cluster-c04dbqcvnpch.ap-southeast-1.docdb.amazonaws.com:27017/?replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false')

document_db = DocumentDB_client["sample_database"]

##Specify the collection to be used



# Migrate from Azure CosmosDB Container to AWS DocumentDB Collection 

In [83]:
from tqdm import tqdm
from time import sleep
database_name="AzureSampleFamilyDatabase"
database = client.get_database_client(database_name)
for container in database.list_containers():
    container_name=container['id']
    container = database.get_container_client(container_name)
    #print("Container ID: {}".format(container_name))
    document_col =document_db[container_name]
    query_text='SELECT * FROM '+container_name
    query_items_response = container.query_items(
        query=query_text,
        enable_cross_partition_query=True
    )
    request_charge = container.client_connection.last_response_headers['x-ms-request-charge']
    items = [item for item in query_items_response]
    total_cnt=len(items)
    print(container_name+': {0} items. '.format(len(items)))
    pbar = tqdm(items)
    for idx, item in enumerate(pbar):
        ss=json.dumps(item, indent=True)
        # 如果cosmosdb中有不兼容的documentDB格式的数据，需要在这里进行字符串的替换，采用ss.replace("源字符"，"目标字符") 
        json1=json.loads(ss)
        document_col.insert_one(json1)
        time.sleep(0.01)
        pbar.set_description(f"No.{idx}")   
            

products: 0 items. 


0it [00:00, ?it/s]


FamilyContainer: 192 items. 


No.191: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 192/192 [00:03<00:00, 60.69it/s]


customers: 0 items. 


0it [00:00, ?it/s]


# Verify Data in AWS DocumentDB

In [33]:
##Find the document that was previously written
x = document_col.find_one({'id':'item5'})
print(x)

{'_id': ObjectId('63762b83ccc2ba65b0bdcf09'), 'id': 'item5', 'productName': 'Widget', 'productModel': 'Model 5', '_rid': 'kzxCAPpoMygaAAAAAAAAAA==', '_self': 'dbs/kzxCAA==/colls/kzxCAPpoMyg=/docs/kzxCAPpoMygaAAAAAAAAAA==/', '_etag': '"08006655-0000-1900-0000-63762b800000"', '_attachments': 'attachments/', '_ts': 1668688768}
