## Maintenez et documentez un système de stockage des données sécurisé et performant

### Importation des librairies nécessaire

In [32]:
import pandas as pd
from pymongo import MongoClient
import json
import os
from dotenv import load_dotenv
from bson.objectid import ObjectId
import unittest

In [33]:
# Lire les variables d'environnement qui se trouve dans le fichier .env
load_dotenv()

True

Définir les variables

In [39]:
csv_file=os.getenv('DATA')
db_name=os.getenv('DB')
collection_name=os.getenv('COLLECTION')
mongodb_uri=os.getenv('MONGODB_URI')

In [41]:
client = MongoClient(mongodb_uri)
db = client[db_name]
collection = db[collection_name]

### Insertion de données dans mongodb via Pandas

In [5]:
df = pd.read_csv(csv_file)

# Convertir le DataFrame Pandas à  une list de dictionaries
records = json.loads(df.to_json(orient='records'))

# Connecter à MongoDB
client = MongoClient(mongodb_uri)
db = client[db_name]
collection = db[collection_name]

# Insertions des données
result = collection.insert_many(records)

print(f" {len(result.inserted_ids)} documents insérés")

 55500 documents insérés


### CRUD

#### Create

In [42]:
collection.insert_one({
    "Name": "Jean Pierre",
    "Age": 26,
    "Gender": "Male",
    "Blood Type" : "A"
})

InsertOneResult(ObjectId('67387f69dfdb7b48533b98b0'), acknowledged=True)

#### Read

In [43]:
collection.find_one({"Name": "Jean Pierre"})

{'_id': ObjectId('67387f69dfdb7b48533b98b0'),
 'Name': 'Jean Pierre',
 'Age': 26,
 'Gender': 'Male',
 'Blood Type': 'A'}

#### Update

In [44]:
myquery = {"Name":"Jean Pierre"}
newvalues = {"$set": {"Age": 27}}
collection.update_one(myquery, newvalues)

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

In [45]:
collection.find_one({"Name": "Jean Pierre"})

{'_id': ObjectId('67387f69dfdb7b48533b98b0'),
 'Name': 'Jean Pierre',
 'Age': 27,
 'Gender': 'Male',
 'Blood Type': 'A'}

#### Delete

In [46]:
myquery = {'_id': ObjectId('6737bd27dfdb7b48533b98aa')}
collection.delete_one(myquery)

DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)

In [47]:
result = collection.find_one({"Name": "Jean Pierre"})

### Tester l’intégrité

In [None]:
# colonnes disponibles, 
# types des variables, doublons, valeurs manquantes

In [24]:
collection.count_documents({})

55500

In [26]:
class TestConnection(unittest.TestCase):
    def setUp(self):
            """Configuration initiale avant chaque test"""
            self.uri =os.getenv('MONGODB_URI')
            self.db_name = os.getenv('DB')
            
    def test_real_connection(self):
        """Test d'une vraie connexion à MongoDB"""
        try:
            # Tentative de connexion
            client = MongoClient(self.uri, serverSelectionTimeoutMS=2000)
            # Force une connexion en exécutant une commande
            client.admin.command('ping')
            
            self.assertTrue(True)  # Si on arrive ici, la connexion est réussie
            
        except ConnectionFailure:
            self.fail("La connexion à MongoDB a échoué")
            
        finally:
            if 'client' in locals():
                client.close()
                
    def test_database_creation(self):
        """Test de création d'une base de données"""
        try:
            client = MongoClient(self.uri)
            db = client[self.db_name]
            
            # Création d'une collection pour tester
            collection = db.test_collection
            result = collection.insert_one({"test": "data"})
            
            # Vérification que l'insertion a fonctionné
            self.assertTrue(result.inserted_id is not None)
            
            # Nettoyage
            client.drop_database(self.db_name)
            
        finally:
            if 'client' in locals():
                client.close()


In [27]:
class TestOperations(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        """Configuration initiale pour la classe de test"""
        cls.client = MongoClient("mongodb://localhost:27017")
        cls.db = cls.client.test_database
        cls.collection = cls.db.test_collection

    def setUp(self):
        """Nettoie la collection avant chaque test"""
        self.collection.delete_many({})

    def test_insert_and_find(self):
        """Test d'insertion et de recherche"""
        test_data = {"name": "test", "value": 123}
        result = self.collection.insert_one(test_data)
        self.assertTrue(result.inserted_id is not None)
        found = self.collection.find_one({"name": "test"})
        self.assertIsNotNone(found)
        self.assertEqual(found["value"], 123)

    def test_update(self):
        """Test de mise à jour"""
        initial_data = {"name": "test", "value": 123}
        self.collection.insert_one(initial_data)
        result = self.collection.update_one(
            {"name": "test"},
            {"$set": {"value": 456}}
        )
        self.assertEqual(result.modified_count, 1)
        updated = self.collection.find_one({"name": "test"})
        self.assertEqual(updated["value"], 456)

    @classmethod
    def tearDownClass(cls):
        """Nettoyage après tous les tests"""
        cls.client.drop_database("test_database")
        cls.client.close()

In [49]:
collection.find_one({})

{'_id': ObjectId('6737b864dfdb7b48533abfde'),
 'Name': 'Bobby JacksOn',
 'Age': 30,
 'Gender': 'Male',
 'Blood Type': 'B-',
 'Medical Condition': 'Cancer',
 'Date of Admission': '2024-01-31',
 'Doctor': 'Matthew Smith',
 'Hospital': 'Sons and Miller',
 'Insurance Provider': 'Blue Cross',
 'Billing Amount': 18856.2813059782,
 'Room Number': 328,
 'Admission Type': 'Urgent',
 'Discharge Date': '2024-02-02',
 'Medication': 'Paracetamol',
 'Test Results': 'Normal'}

In [None]:
import unittest
from pymongo import MongoClient
from datetime import datetime
from bson import ObjectId
import re

class TestMongoDBIntegrity(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        """Configuration initiale de la connexion"""
        cls.client = MongoClient("mongodb://localhost:27017")
        cls.db = cls.client.test_database
        
        # Définition du schéma de validation
        cls.user_schema = {
            "$jsonSchema": {
                "bsonType": "object",
                "required": ["Name", 
                             "Age", 
                             "Gender", 
                             "Blood Type", 
                             "Medical Condition", 
                             "Date of Admission",
                             "Doctor",
                             "Hospital",
                             "Insurance Provider",
                             "Billing Amount",
                             "Room Number",
                             "Admission Type",
                             "Discharge Date",
                             "Medication",
                             "Test Results"
                            ],
                "properties": {
                    "Name": {
                        "bsonType": "string",
                        "minLength": 6,
                        "maxLength": 100
                    },
                    "age": {
                        "bsonType": ["int", "null"],
                        "minimum": 0,
                        "maximum": 120
                    },
                    "Gender": {
                        "bsonType": ["string", "null"],
                        "minLength": 1,
                        "maxLength": 10 
                    },
                    "Blood Type": {
                        "bsonType": ["string", "null"],
                        "minLength": 1,
                        "maxLength": 5 
                    },
                    "Medical Condition": {
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    }, 
                    "Date of Admission": {
                        "bsonType": "date"
                    },
                    "Doctor": {
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    },
                    "Hospital": {
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    },
                    "Insurance Provider": {
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    },
                    "Billing Amount": {
                        "bsonType": ["float", "null"],
                        "minLength": 0
                    },
                    
                    "Room Number": {
                        "bsonType": ["string", "null", 'int']
                    },
                    "Admission Type":{
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    },
                    "Discharge Date": {
                        "bsonType": ["date", "null"]
                    },
                    "Medication":{
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    },
                    "Test Results": {
                        "bsonType": ["string", "null"],
                        "minLength": 3,
                        "maxLength": 50 
                    }
                }
            }
        }

    def setUp(self):
        """Préparation avant chaque test"""
        # Création d'une nouvelle collection avec validation
        if "users" in self.db.list_collection_names():
            self.db.users.drop()
        self.db.create_collection("users", validator=self.user_schema)
        self.users = self.db.users

    def test_schema_validation(self):
        """Test de la validation du schéma"""
        # Document valide
        valid_patient = {
            'Name': 'Bobby JacksOn',
            'Age': 30,
            'Gender': 'Male',
            'Blood Type': 'B-',
            'Medical Condition': 'Cancer',
            'Date of Admission': '2024-01-31',
            'Doctor': 'Matthew Smith',
            'Hospital': 'Sons and Miller',
            'Insurance Provider': 'Blue Cross',
            'Billing Amount': 18856.2813059782,
            'Room Number': 328,
            'Admission Type': 'Urgent',
            'Discharge Date': '2024-02-02',
            'Medication': 'Paracetamol',
            'Test Results': 'Normal'
        }
        result = self.users.insert_one(valid_user)
        self.assertTrue(result.inserted_id is not None)
        
        # Document invalide (age incorrect)
        invalid_user = {
            'Name': 'Coco Tello',
            'Age': 150,
            'Gender': 'Female',
            'Blood Type': 'A',
            'Medical Condition': 'Cancer',
            'Date of Admission': '2024-01-31',
            'Doctor': 'Matthew Smith',
            'Hospital': 'Sons and Miller',
            'Insurance Provider': 'Blue Cross',
            'Billing Amount': 18856.2813059782,
            'Room Number': 328,
            'Admission Type': None,
            'Discharge Date': '2024-02-02',
            'Medication': None,
            'Test Results': 'Normal'
        }
        with self.assertRaises(Exception):
            self.users.insert_one(invalid_user)

    def test_referential_integrity(self):
        """Test de l'intégrité référentielle"""
        # Création d'une collection pour les posts
        if "patients" in self.db.list_collection_names():
            self.db.posts.drop()
        self.db.create_collection("patients")
        
        # Création d'un utilisateur
        user = {
            "username": "test_user",
            "email": "test@example.com",
            "created_at": datetime.utcnow()
        }
        user_id = self.users.insert_one(user).inserted_id
        
        # Création d'un post lié à l'utilisateur
        post = {
            "title": "Test Post",
            "user_id": user_id,
            "content": "Test content"
        }
        post_id = self.db.posts.insert_one(post).inserted_id
        
        # Vérification de l'intégrité référentielle
        found_post = self.db.posts.find_one({"_id": post_id})
        found_user = self.users.find_one({"_id": found_post["user_id"]})
        
        self.assertIsNotNone(found_user)
        self.assertEqual(found_user["username"], "test_user")

    def test_data_consistency(self):
        """Test de la cohérence des données"""
        # Insertion de données de test
        test_users = [
            {
                "username": f"user_{i}",
                "email": f"user{i}@example.com",
                "age": 20 + i,
                "created_at": datetime.utcnow()
            }
            for i in range(5)
        ]
        
        self.users.insert_many(test_users)
        
        # Vérification de l'unicité des usernames
        usernames = self.users.distinct("username")
        self.assertEqual(len(usernames), 5)
        
        # Vérification des contraintes d'âge
        invalid_age_count = self.users.count_documents({
            "age": {"$not": {"$gte": 0, "$lte": 120}}
        })
        self.assertEqual(invalid_age_count, 0)

    def test_email_format(self):
        """Test du format des emails"""
        def is_valid_email(email):
            pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
            return bool(re.match(pattern, email))
        
        # Vérification de tous les emails
        all_users = self.users.find({}, {"email": 1})
        for user in all_users:
            self.assertTrue(is_valid_email(user["email"]))

    def test_index_integrity(self):
        """Test de l'intégrité des index"""
        # Création d'index uniques
        self.users.create_index("username", unique=True)
        self.users.create_index("email", unique=True)
        
        # Test de l'unicité
        user1 = {
            "username": "unique_user",
            "email": "unique@example.com",
            "created_at": datetime.utcnow()
        }
        
        self.users.insert_one(user1)
        
        # Tentative d'insertion avec username dupliqué
        user2 = {
            "username": "unique_user",
            "email": "different@example.com",
            "created_at": datetime.utcnow()
        }
        
        with self.assertRaises(Exception):
            self.users.insert_one(user2)

    @classmethod
    def tearDownClass(cls):
        """Nettoyage après tous les tests"""
        cls.db.users.drop()
        cls.db.posts.drop()
        cls.client.close()
