In [None]:
from pymongo import MongoClient
from core.custom_logger import CustomLogger
import sys
import os

# Add project root to path so we can import config
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from properties.mango import MONGODB_URI, DB_NAME, COLLECTION_NAME

logger = CustomLogger().get_logger("search_index_manager")


class SearchIndexManager:
    def __init__(self):
        self.client = MongoClient(MONGODB_URI)
        self.db = self.client[DB_NAME]
        self.collection = self.db[COLLECTION_NAME]

    def check_search_index_exists(self, index_name="vector_search_index"):
        """Check if a search index exists"""
        try:
            # List all search indexes
            indexes = list(self.collection.list_search_indexes())
            for index in indexes:
                if index.get("name") == index_name:
                    logger.info(f"Search index '{index_name}' already exists")
                    return True
            logger.info(f"Search index '{index_name}' does not exist")
            return False
        except Exception as e:
            logger.error(f"Error checking search index: {str(e)}")
            return False

    def get_default_search_index_definition(self):
        """Get the default search index definition"""
        return {
            "name": "vector_search_index",
            "definition": {
                "mappings": {
                    "dynamic": False,
                    "fields": {
                        "combined_resume_vector": {
                            "type": "knnVector",
                            "dimensions": 384,
                            "similarity": "cosine",
                        },
                        "skills_vector": {
                            "type": "knnVector",
                            "dimensions": 384,
                            "similarity": "cosine",
                        },
                        "experience_text_vector": {
                            "type": "knnVector",
                            "dimensions": 384,
                            "similarity": "cosine",
                        },
                        "academic_details_vector": {
                            "type": "knnVector",
                            "dimensions": 384,
                            "similarity": "cosine",
                        },
                    },
                }
            },
        }

    def create_search_index(self, index_definition=None):
        """Create a search index"""
        try:
            if index_definition is None:
                index_definition = self.get_default_search_index_definition()

            command = {
                "createSearchIndexes": COLLECTION_NAME,
                "indexes": [index_definition],
            }

            result = self.db.command(command)
            logger.info(f"Search index created successfully: {result}")
            return True, result
        except Exception as e:
            logger.error(f"Failed to create search index: {str(e)}")
            return False, str(e)

    def delete_search_index(self, index_name):
        """Delete a search index"""
        try:
            command = {"dropSearchIndex": COLLECTION_NAME, "index": index_name}
            result = self.db.command(command)
            logger.info(f"Search index '{index_name}' deleted successfully: {result}")
            return True, result
        except Exception as e:
            logger.error(f"Failed to delete search index '{index_name}': {str(e)}")
            return False, str(e)

    def list_search_indexes(self):
        """List all search indexes"""
        try:
            indexes = list(self.collection.list_search_indexes())
            logger.info(f"Found {len(indexes)} search indexes")
            return True, indexes
        except Exception as e:
            logger.error(f"Failed to list search indexes: {str(e)}")
            return False, str(e)

    def update_search_index(self, index_name, new_definition):
        """Update a search index by deleting and recreating it"""
        try:
            # First delete the existing index
            delete_success, delete_result = self.delete_search_index(index_name)
            if not delete_success:
                return False, f"Failed to delete existing index: {delete_result}"

            # Then create the new index
            create_success, create_result = self.create_search_index(new_definition)
            if not create_success:
                return False, f"Failed to create new index: {create_result}"

            logger.info(f"Search index '{index_name}' updated successfully")
            return True, "Index updated successfully"
        except Exception as e:
            logger.error(f"Failed to update search index '{index_name}': {str(e)}")
            return False, str(e)


def initialize_database():
    """Initialize database and create search index if it doesn't exist"""
    try:
        # Connect to MongoDB
        client = MongoClient(MONGODB_URI)
        db = client[DB_NAME]

        # Create collection if it doesn't exist
        if COLLECTION_NAME not in db.list_collection_names():
            db.create_collection(COLLECTION_NAME)
            db[COLLECTION_NAME].insert_one({"_id": "dummy", "initialization": True})
            logger.info(f"Created collection {COLLECTION_NAME} in database {DB_NAME}")

            # Remove dummy document
            db[COLLECTION_NAME].delete_one({"_id": "dummy"})

        # Initialize search index manager
        index_manager = SearchIndexManager()

        # Check if search index exists, if not create it
        if not index_manager.check_search_index_exists():
            success, result = index_manager.create_search_index()
            if success:
                logger.info("Search index created during initialization")
            else:
                logger.error(
                    f"Failed to create search index during initialization: {result}"
                )
                return False
        else:
            logger.info("Search index already exists, skipping creation")

        return True

    except Exception as e:
        logger.error(f"Failed to initialize database: {str(e)}")
        return False


if __name__ == "__main__":
    success = initialize_database()
    if success:
        print("Database initialized successfully")
    else:
        print("Database initialization failed")


In [None]:
# resume_api/main.py
from contextlib import asynccontextmanager
from fastapi import FastAPI
from apis.add_userdata import router as add_urer_data
from fastapi.middleware.cors import CORSMiddleware
from masking.routes import router as masking_router
from GroqcloudLLM.routes import router as groqcloud_router
from core.custom_logger import CustomLogger
from core.config import config

# from apis.healthcheck import router as health_router

# Import main functions from separate module
from main_functions import (
    initialize_application_startup,
    handle_application_shutdown
)

# Initialize logger
logger_manager = CustomLogger()
logger = logger_manager.get_logger("main")

# Global search index manager
search_index_manager = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup
    global search_index_manager
    
    try:
        search_index_manager, startup_success = await initialize_application_startup()
    except Exception as e:
        logger.error(f"Application startup failed: {str(e)}")
        raise e

    yield

    # Shutdown
    handle_application_shutdown()


app = FastAPI(
    title="Resume API",
    description="API for managing resume data with vector search",
    lifespan=lifespan,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # For production, replace with specific origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include routers
# app.include_router(health_router)
app.include_router(masking_router, prefix="/masking", tags=["Masking"])
app.include_router(groqcloud_router, prefix="/groqcloud", tags=["GroqcloudLLM"])
app.include_router(add_urer_data)


@app.get("/", tags=["Root"])
async def read_root():
    return {
        "message": "Welcome to the Resume API! Use the endpoints to manage resumes and perform searches."
    }


# Health check endpoint to verify vector search status


if __name__ == "__main__":
    import uvicorn

    uvicorn.run("main:app", port=8000, reload=True)
    # uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)


In [None]:
@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup
    global search_index_manager
    logger.info("Starting up FastAPI application...")

    # Initialize database and search index manager
    try:
        # Initialize database (creates collection and default search index if needed)
        init_success = initialize_database()
        if not init_success:
            logger.error("Failed to initialize database during startup")
            raise Exception("Database initialization failed")

        # Initialize search index manager
        search_index_manager = SearchIndexManager()
        logger.info("Search index manager initialized successfully")

    except Exception as e:
        logger.error(f"Startup failed: {str(e)}")
        raise e

    yield

    # Shutdown
    logger.info("Shutting down FastAPI application...")


In [None]:
@app.on_event("startup")
async def startup_event():
    try:
        logger.info("Starting application startup...")
        
        # Test database connection first
        collection.database.client.admin.command("ping")
        logger.info("Connected to MongoDB successfully!")
        
        skills_titles_collection.database.client.admin.command("ping")
        logger.info("Connected to Skills & Titles collection successfully!")
        
        # Get existing indexes to avoid conflicts
        existing_indexes = list(collection.list_indexes())
        existing_index_names = [idx['name'] for idx in existing_indexes]
        
        # Check for existing text index and drop if it conflicts
        text_index_exists = False
        for idx in existing_indexes:
            if idx.get('key', {}).get('_fts') == 'text':
                text_index_exists = True
                # Check if it has all the fields we need
                weights = idx.get('weights', {})
                required_fields = [
                    'name', 'skills', 'education.institution', 'education.degree',
                    'experience.company', 'experience.title', 'projects.name',
                    'projects.description', 'contact_details.name', 'may_also_known_skills',
                    'labels', 'academic_details.education', 'academic_details.college',
                    'combined_resume'
                ]
                
                # If the existing text index doesn't have all required fields, drop it
                missing_fields = [field for field in required_fields if field not in weights]
                if missing_fields:
                    logger.info(f"Dropping existing text index to recreate with new fields: {missing_fields}")
                    collection.drop_index(idx['name'])
                    text_index_exists = False
                break
        
        # Create regular indexes (only if they don't exist)
        indexes_to_create = [
            "name", "skills", "education.institution", "education.degree",
            "experience.company", "experience.title", "total_experience",
            "projects.name", "projects.technologies", "projects.role",
            "contact_details.address", "user_id", "username",
            "contact_details.name", "contact_details.email", "contact_details.phone",
            "contact_details.current_city", "contact_details.pan_card",
            "contact_details.aadhar_card", "may_also_known_skills", "labels",
            "academic_details.education", "academic_details.college",
            "academic_details.pass_year", "current_salary", "expected_salary",
            "notice_period", "source", "created_at", "is_tier1_mba",
            "is_tier1_engineering"
        ]
        
        for index_field in indexes_to_create:
            index_name = f"{index_field}_1"
            if index_name not in existing_index_names:
                try:
                    collection.create_index(index_field)
                    logger.info(f"Created index: {index_field}")
                except Exception as e:
                    logger.warning(f"Failed to create index {index_field}: {str(e)}")
        
        # Create compound indexes (check if they exist first)
        compound_indexes = [
            ([("contact_details.pan_card", 1), ("contact_details.email", 1)], "contact_details.pan_card_1_contact_details.email_1"),
            ([("skills", 1), ("total_experience", 1)], "skills_1_total_experience_1"),
            ([("contact_details.current_city", 1), ("expected_salary", 1)], "contact_details.current_city_1_expected_salary_1"),
            ([("is_tier1_mba", 1), ("is_tier1_engineering", 1)], "is_tier1_mba_1_is_tier1_engineering_1")
        ]
        
        for index_spec, expected_name in compound_indexes:
            if expected_name not in existing_index_names:
                try:
                    collection.create_index(index_spec)
                    logger.info(f"Created compound index: {expected_name}")
                except Exception as e:
                    logger.warning(f"Failed to create compound index {expected_name}: {str(e)}")
        
        # Create text index for full-text search (only if it doesn't exist or was dropped)
        if not text_index_exists:
            try:
                collection.create_index(
                    [
                        ("name", "text"),
                        ("skills", "text"),
                        ("education.institution", "text"),
                        ("education.degree", "text"),
                        ("experience.company", "text"),
                        ("experience.title", "text"),
                        ("projects.name", "text"),
                        ("projects.description", "text"),
                        ("contact_details.name", "text"),
                        ("may_also_known_skills", "text"),
                        ("labels", "text"),
                        ("academic_details.education", "text"),
                        ("academic_details.college", "text"),
                        ("combined_resume", "text"),
                    ]
                )
                logger.info("Created text index for full-text search")
            except Exception as e:
                logger.warning(f"Failed to create text index: {str(e)}")
        
        # Handle skills_titles_collection indexes
        skills_existing_indexes = list(skills_titles_collection.list_indexes())
        skills_existing_names = [idx['name'] for idx in skills_existing_indexes]
        
        # Create indexes for skills_titles_collection
        skills_indexes = ["type", "value"]
        for index_field in skills_indexes:
            index_name = f"{index_field}_1"
            if index_name not in skills_existing_names:
                try:
                    skills_titles_collection.create_index(index_field)
                    logger.info(f"Created skills index: {index_field}")
                except Exception as e:
                    logger.warning(f"Failed to create skills index {index_field}: {str(e)}")
        
        # Create compound index for skills_titles_collection
        compound_name = "type_1_value_1"
        if compound_name not in skills_existing_names:
            try:
                skills_titles_collection.create_index([("type", 1), ("value", 1)])
                logger.info("Created skills compound index: type_1_value_1")
            except Exception as e:
                logger.warning(f"Failed to create skills compound index: {str(e)}")
        
        # Create text index for skills_titles_collection
        skills_text_exists = any(idx.get('key', {}).get('_fts') == 'text' for idx in skills_existing_indexes)
        if not skills_text_exists:
            try:
                skills_titles_collection.create_index([("value", "text")])
                logger.info("Created skills text index")
            except Exception as e:
                logger.warning(f"Failed to create skills text index: {str(e)}")
        
        # Create unique compound index for skills_titles_collection (with error handling)
        unique_compound_name = "type_1_value_1"  # This might conflict with the non-unique one
        try:
            # Try to create unique index
            skills_titles_collection.create_index([("type", 1), ("value", 1)], unique=True)
            logger.info("Created unique compound index for skills_titles_collection")
        except Exception as e:
            logger.warning(f"Failed to create unique compound index (may already exist): {str(e)}")
        
        # Create vector search index (with better error handling)
        try:
            create_vector_search_index(collection)
            if verify_vector_search_index(collection):
                logger.info("Vector search index is ready")
            else:
                logger.warning("Vector search index verification failed - Atlas Search may not be enabled")
        except Exception as e:
            logger.warning(f"Vector search index creation failed: {str(e)}")
        
        logger.info("Application startup completed successfully!")
        
    except Exception as e:
        logger.error(f"Failed during startup: {e}")
        # Don't raise the exception to allow the app to start even if some indexes fail
        logger.warning("Some indexes may not have been created, but the application will continue to run")

In [None]:
# resume_api/main.py
from fastapi import FastAPI
from apis import *
from fastapi.middleware.cors import CORSMiddleware
from masking.routes import router as masking_router
from GroqcloudLLM.routes import router as groqcloud_router
from mangodatabase.search_indexes import (
    create_vector_search_index,
    verify_vector_search_index,
)
from mangodatabase.client import get_collection, get_skills_titles_collection
from core.custom_logger import CustomLogger
from apis.healthcheck import router as health_router

# Initialize logger
logger_manager = CustomLogger()
logger = logger_manager.get_logger("main")

app = FastAPI(
    title="Resume API", description="API for managing resume data with vector search"
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # For production, replace with specific origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# Initialize database collection
collection = get_collection()
skills_titles_collection = get_skills_titles_collection()
# Initialize logger

# Create vector search index
create_vector_search_index(collection)

# Include routers
app.include_router(health_router)  # Add this line
app.include_router(masking_router, prefix="/masking", tags=["Masking"])
app.include_router(groqcloud_router, prefix="/groqcloud", tags=["GroqcloudLLM"])

# app.include_router(add_urer_data.router)
# app.include_router(autocomplete_job_title_skills.router)
# app.include_router(autocomplete_city.router)
# app.include_router(manual_resume_search.router)
# app.include_router(skills_experince_title.router)
# app.include_router(ai_resume_search.enhanced_search_router)


@app.get("/", tags=["Root"])
async def read_root():
    return {
        "message": "Welcome to the Resume API! Use the endpoints to manage resumes and perform searches."
    }


@app.on_event("startup")
async def startup_event():
    # Create regular indexes for main collection
    collection.create_index("name")
    collection.create_index("skills")
    collection.create_index("education.institution")
    collection.create_index("education.degree")
    collection.create_index("experience.company")
    collection.create_index("experience.title")
    collection.create_index("total_experience")
    collection.create_index("projects.name")
    collection.create_index("projects.technologies")
    collection.create_index("projects.role")
    collection.create_index("contact_details.address")

    # Add new indexes for user data structure
    collection.create_index("user_id")
    collection.create_index("username")
    collection.create_index("contact_details.name")
    collection.create_index("contact_details.email")
    collection.create_index("contact_details.phone")
    collection.create_index("contact_details.current_city")
    collection.create_index("contact_details.pan_card")
    collection.create_index("contact_details.aadhar_card")
    collection.create_index("may_also_known_skills")
    collection.create_index("labels")
    collection.create_index("academic_details.education")
    collection.create_index("academic_details.college")
    collection.create_index("academic_details.pass_year")
    collection.create_index("current_salary")
    collection.create_index("expected_salary")
    collection.create_index("notice_period")
    collection.create_index("source")
    collection.create_index("created_at")
    collection.create_index("is_tier1_mba")
    collection.create_index("is_tier1_engineering")

    # Create compound indexes for better query performance
    collection.create_index(
        [("contact_details.pan_card", 1), ("contact_details.email", 1)]
    )
    collection.create_index([("skills", 1), ("total_experience", 1)])
    collection.create_index(
        [("contact_details.current_city", 1), ("expected_salary", 1)]
    )
    collection.create_index([("is_tier1_mba", 1), ("is_tier1_engineering", 1)])

    # Create text index for full-text search
    collection.create_index(
        [
            ("name", "text"),
            ("skills", "text"),
            ("education.institution", "text"),
            ("education.degree", "text"),
            ("experience.company", "text"),
            ("experience.title", "text"),
            ("projects.name", "text"),
            ("projects.description", "text"),
            ("contact_details.name", "text"),
            ("may_also_known_skills", "text"),
            ("labels", "text"),
            ("academic_details.education", "text"),
            ("academic_details.college", "text"),
            ("combined_resume", "text"),
        ]
    )

    # Create indexes for skills_titles_collection
    skills_titles_collection.create_index("type")
    skills_titles_collection.create_index("value")
    skills_titles_collection.create_index([("type", 1), ("value", 1)])

    # Create text index for skills_titles_collection for autocomplete functionality
    skills_titles_collection.create_index([("value", "text")])

    # Create unique compound index to prevent duplicates
    skills_titles_collection.create_index([("type", 1), ("value", 1)], unique=True)

    # Test database connection
    try:
        collection.database.client.admin.command("ping")
        logger.info("Connected to MongoDB successfully!")

        # Test skills_titles_collection connection
        skills_titles_collection.database.client.admin.command("ping")
        logger.info("Connected to Skills & Titles collection successfully!")

        create_vector_search_index(collection)
        if not verify_vector_search_index(collection):
            raise Exception("Index verification failed")
    except Exception as e:
        logger.error(f"Failed to connect to MongoDB: {e}")


if __name__ == "__main__":
    import uvicorn

    uvicorn.run("main:app", port=8000, reload=True)
    # uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)


In [None]:
@app.on_event("startup")
async def startup_event():
    try:
        logger.info("Starting application startup...")
        
        # Test database connection first
        collection.database.client.admin.command("ping")
        logger.info("Connected to MongoDB successfully!")
        
        skills_titles_collection.database.client.admin.command("ping")
        logger.info("Connected to Skills & Titles collection successfully!")
        
        # Get existing indexes to avoid conflicts
        existing_indexes = list(collection.list_indexes())
        existing_index_names = [idx['name'] for idx in existing_indexes]
        
        # Create regular indexes based on actual ResumeData schema
        indexes_to_create = [
            "user_id", "username", "total_experience", "notice_period",
            "currency", "pay_duration", "current_salary", "hike", "expected_salary",
            "skills", "may_also_known_skills", "labels", "source",
            "last_working_day", "is_tier1_mba", "is_tier1_engineering",
            "comment", "exit_reason",
            # Contact details fields
            "contact_details.name", "contact_details.email", "contact_details.phone",
            "contact_details.alternative_phone", "contact_details.current_city",
            "contact_details.looking_for_jobs_in", "contact_details.gender",
            "contact_details.age", "contact_details.naukri_profile",
            "contact_details.linkedin_profile", "contact_details.portfolio_link",
            "contact_details.pan_card", "contact_details.aadhar_card",
            # Experience fields
            "experience.company", "experience.title", "experience.from_date",
            "experience.to",
            # Education fields
            "academic_details.education", "academic_details.college",
            "academic_details.pass_year"
        ]
        
        for index_field in indexes_to_create:
            index_name = f"{index_field}_1"
            if index_name not in existing_index_names:
                try:
                    collection.create_index(index_field)
                    logger.info(f"Created index: {index_field}")
                except Exception as e:
                    logger.warning(f"Failed to create index {index_field}: {str(e)}")
        
        # Create compound indexes for better query performance
        compound_indexes = [
            # Contact and identification
            ([("contact_details.pan_card", 1), ("contact_details.email", 1)], "contact_details.pan_card_1_contact_details.email_1"),
            ([("user_id", 1), ("username", 1)], "user_id_1_username_1"),
            
            # Skills and experience combinations
            ([("skills", 1), ("total_experience", 1)], "skills_1_total_experience_1"),
            ([("may_also_known_skills", 1), ("total_experience", 1)], "may_also_known_skills_1_total_experience_1"),
            ([("experience.title", 1), ("total_experience", 1)], "experience.title_1_total_experience_1"),
            ([("may_also_known_skills", 1), ("experience.title", 1)], "may_also_known_skills_1_experience.title_1"),
            ([("may_also_known_skills", 1), ("experience.title", 1), ("total_experience", 1)], "may_also_known_skills_1_experience.title_1_total_experience_1"),
            
            # Location and salary
            ([("contact_details.current_city", 1), ("expected_salary", 1)], "contact_details.current_city_1_expected_salary_1"),
            ([("contact_details.current_city", 1), ("current_salary", 1)], "contact_details.current_city_1_current_salary_1"),
            ([("contact_details.looking_for_jobs_in", 1), ("expected_salary", 1)], "contact_details.looking_for_jobs_in_1_expected_salary_1"),
            
            # Education tier indicators
            ([("is_tier1_mba", 1), ("is_tier1_engineering", 1)], "is_tier1_mba_1_is_tier1_engineering_1"),
            
            # Notice period and availability
            ([("notice_period", 1), ("last_working_day", 1)], "notice_period_1_last_working_day_1"),
            
            # Salary range queries
            ([("current_salary", 1), ("expected_salary", 1)], "current_salary_1_expected_salary_1"),
        ]
        
        for index_spec, expected_name in compound_indexes:
            if expected_name not in existing_index_names:
                try:
                    collection.create_index(index_spec)
                    logger.info(f"Created compound index: {expected_name}")
                except Exception as e:
                    logger.warning(f"Failed to create compound index {expected_name}: {str(e)}")
        
        # Create text index for full-text search based on actual schema
        text_index_exists = any(idx.get('key', {}).get('_fts') == 'text' for idx in existing_indexes)
        if not text_index_exists:
            try:
                collection.create_index(
                    [
                        ("contact_details.name", "text"),
                        ("skills", "text"),
                        ("may_also_known_skills", "text"),
                        ("labels", "text"),
                        ("experience.company", "text"),
                        ("experience.title", "text"),
                        ("academic_details.education", "text"),
                        ("academic_details.college", "text"),
                        ("comment", "text"),
                        ("exit_reason", "text"),
                    ]
                )
                logger.info("Created text index for full-text search")
            except Exception as e:
                logger.warning(f"Failed to create text index: {str(e)}")
        
        # Handle skills_titles_collection indexes (keep existing code)
        skills_existing_indexes = list(skills_titles_collection.list_indexes())
        skills_existing_names = [idx['name'] for idx in skills_existing_indexes]
        
        # Create indexes for skills_titles_collection
        skills_indexes = ["type", "value"]
        for index_field in skills_indexes:
            index_name = f"{index_field}_1"
            if index_name not in skills_existing_names:
                try:
                    skills_titles_collection.create_index(index_field)
                    logger.info(f"Created skills index: {index_field}")
                except Exception as e:
                    logger.warning(f"Failed to create skills index {index_field}: {str(e)}")
        
        # Create compound index for skills_titles_collection
        compound_name = "type_1_value_1"
        if compound_name not in skills_existing_names:
            try:
                skills_titles_collection.create_index([("type", 1), ("value", 1)])
                logger.info("Created skills compound index: type_1_value_1")
            except Exception as e:
                logger.warning(f"Failed to create skills compound index: {str(e)}")
        
        # Create text index for skills_titles_collection
        skills_text_exists = any(idx.get('key', {}).get('_fts') == 'text' for idx in skills_existing_indexes)
        if not skills_text_exists:
            try:
                skills_titles_collection.create_index([("value", "text")])
                logger.info("Created skills text index")
            except Exception as e:
                logger.warning(f"Failed to create skills text index: {str(e)}")
        
        # Create unique compound index for skills_titles_collection
        try:
            skills_titles_collection.create_index([("type", 1), ("value", 1)], unique=True)
            logger.info("Created unique compound index for skills_titles_collection")
        except Exception as e:
            logger.warning(f"Failed to create unique compound index (may already exist): {str(e)}")
        
        # Create vector search index
        try:
            create_vector_search_index(collection)
            if verify_vector_search_index(collection):
                logger.info("Vector search index is ready")
            else:
                logger.warning("Vector search index verification failed")
        except Exception as e:
            logger.warning(f"Vector search index creation failed: {str(e)}")
        
        logger.info("Application startup completed successfully!")
        
    except Exception as e:
        logger.error(f"Failed during startup: {e}")
        logger.warning("Some indexes may not have been created, but the application will continue to run")

In [None]:
# resume_api/main.py
from fastapi import FastAPI
from apis import *
from fastapi.middleware.cors import CORSMiddleware
from masking.routes import router as masking_router
from GroqcloudLLM.routes import router as groqcloud_router
from mangodatabase.search_indexes import (
    create_vector_search_index,
    verify_vector_search_index,
)
from mangodatabase.client import get_collection, get_skills_titles_collection
from core.custom_logger import CustomLogger
from apis.healthcheck import router as health_router

# Initialize logger
logger_manager = CustomLogger()
logger = logger_manager.get_logger("main")

app = FastAPI(
    title="Resume API", description="API for managing resume data with vector search"
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # For production, replace with specific origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# Initialize database collection
collection = get_collection()
skills_titles_collection = get_skills_titles_collection()
# Initialize logger

# Create vector search index
create_vector_search_index(collection)

# Include routers
app.include_router(health_router)  # Add this line
app.include_router(masking_router, prefix="/masking", tags=["Masking"])
app.include_router(groqcloud_router, prefix="/groqcloud", tags=["GroqcloudLLM"])

# app.include_router(add_urer_data.router)
# app.include_router(autocomplete_job_title_skills.router)
# app.include_router(autocomplete_city.router)
# app.include_router(manual_resume_search.router)
# app.include_router(skills_experince_title.router)
# app.include_router(ai_resume_search.enhanced_search_router)


@app.get("/", tags=["Root"])
async def read_root():
    return {
        "message": "Welcome to the Resume API! Use the endpoints to manage resumes and perform searches."
    }


@app.on_event("startup")
async def startup_event():
    try:
        logger.info("Starting application startup...")

        # Test database connection first
        collection.database.client.admin.command("ping")
        logger.info("Connected to MongoDB successfully!")

        skills_titles_collection.database.client.admin.command("ping")
        logger.info("Connected to Skills & Titles collection successfully!")

        # Get existing indexes to avoid conflicts
        existing_indexes = list(collection.list_indexes())
        existing_index_names = [idx["name"] for idx in existing_indexes]

        # Create regular indexes based on actual ResumeData schema
        indexes_to_create = [
            "user_id",
            "username",
            "total_experience",
            "notice_period",
            "currency",
            "pay_duration",
            "current_salary",
            "hike",
            "expected_salary",
            "skills",
            "may_also_known_skills",
            "labels",
            "source",
            "last_working_day",
            "is_tier1_mba",
            "is_tier1_engineering",
            "comment",
            "exit_reason",
            # Contact details fields
            "contact_details.name",
            "contact_details.email",
            "contact_details.phone",
            "contact_details.alternative_phone",
            "contact_details.current_city",
            "contact_details.looking_for_jobs_in",
            "contact_details.gender",
            "contact_details.age",
            "contact_details.naukri_profile",
            "contact_details.linkedin_profile",
            "contact_details.portfolio_link",
            "contact_details.pan_card",
            "contact_details.aadhar_card",
            # Experience fields
            "experience.company",
            "experience.title",
            "experience.from_date",
            "experience.to",
            # Education fields
            "academic_details.education",
            "academic_details.college",
            "academic_details.pass_year",
        ]

        for index_field in indexes_to_create:
            index_name = f"{index_field}_1"
            if index_name not in existing_index_names:
                try:
                    collection.create_index(index_field)
                    logger.info(f"Created index: {index_field}")
                except Exception as e:
                    logger.warning(f"Failed to create index {index_field}: {str(e)}")

        # Create compound indexes for better query performance
        compound_indexes = [
            # Contact and identification
            (
                [("contact_details.pan_card", 1), ("contact_details.email", 1)],
                "contact_details.pan_card_1_contact_details.email_1",
            ),
            ([("user_id", 1), ("username", 1)], "user_id_1_username_1"),
            # Skills and experience combinations
            ([("skills", 1), ("total_experience", 1)], "skills_1_total_experience_1"),
            (
                [("may_also_known_skills", 1), ("total_experience", 1)],
                "may_also_known_skills_1_total_experience_1",
            ),
            (
                [("experience.title", 1), ("total_experience", 1)],
                "experience.title_1_total_experience_1",
            ),
            (
                [("may_also_known_skills", 1), ("experience.title", 1)],
                "may_also_known_skills_1_experience.title_1",
            ),
            (
                [
                    ("may_also_known_skills", 1),
                    ("experience.title", 1),
                    ("total_experience", 1),
                ],
                "may_also_known_skills_1_experience.title_1_total_experience_1",
            ),
            # Location and salary
            (
                [("contact_details.current_city", 1), ("expected_salary", 1)],
                "contact_details.current_city_1_expected_salary_1",
            ),
            (
                [("contact_details.current_city", 1), ("current_salary", 1)],
                "contact_details.current_city_1_current_salary_1",
            ),
            (
                [("contact_details.looking_for_jobs_in", 1), ("expected_salary", 1)],
                "contact_details.looking_for_jobs_in_1_expected_salary_1",
            ),
            # Education tier indicators
            (
                [("is_tier1_mba", 1), ("is_tier1_engineering", 1)],
                "is_tier1_mba_1_is_tier1_engineering_1",
            ),
            # Notice period and availability
            (
                [("notice_period", 1), ("last_working_day", 1)],
                "notice_period_1_last_working_day_1",
            ),
            # Salary range queries
            (
                [("current_salary", 1), ("expected_salary", 1)],
                "current_salary_1_expected_salary_1",
            ),
        ]

        for index_spec, expected_name in compound_indexes:
            if expected_name not in existing_index_names:
                try:
                    collection.create_index(index_spec)
                    logger.info(f"Created compound index: {expected_name}")
                except Exception as e:
                    logger.warning(
                        f"Failed to create compound index {expected_name}: {str(e)}"
                    )

        # Create text index for full-text search based on actual schema
        text_index_exists = any(
            idx.get("key", {}).get("_fts") == "text" for idx in existing_indexes
        )
        if not text_index_exists:
            try:
                collection.create_index(
                    [
                        ("contact_details.name", "text"),
                        ("skills", "text"),
                        ("may_also_known_skills", "text"),
                        ("labels", "text"),
                        ("experience.company", "text"),
                        ("experience.title", "text"),
                        ("academic_details.education", "text"),
                        ("academic_details.college", "text"),
                        ("comment", "text"),
                        ("exit_reason", "text"),
                    ]
                )
                logger.info("Created text index for full-text search")
            except Exception as e:
                logger.warning(f"Failed to create text index: {str(e)}")

        # Handle skills_titles_collection indexes (keep existing code)
        skills_existing_indexes = list(skills_titles_collection.list_indexes())
        skills_existing_names = [idx["name"] for idx in skills_existing_indexes]

        # Create indexes for skills_titles_collection
        skills_indexes = ["type", "value"]
        for index_field in skills_indexes:
            index_name = f"{index_field}_1"
            if index_name not in skills_existing_names:
                try:
                    skills_titles_collection.create_index(index_field)
                    logger.info(f"Created skills index: {index_field}")
                except Exception as e:
                    logger.warning(
                        f"Failed to create skills index {index_field}: {str(e)}"
                    )

        # Create compound index for skills_titles_collection
        compound_name = "type_1_value_1"
        if compound_name not in skills_existing_names:
            try:
                skills_titles_collection.create_index([("type", 1), ("value", 1)])
                logger.info("Created skills compound index: type_1_value_1")
            except Exception as e:
                logger.warning(f"Failed to create skills compound index: {str(e)}")

        # Create text index for skills_titles_collection
        skills_text_exists = any(
            idx.get("key", {}).get("_fts") == "text" for idx in skills_existing_indexes
        )
        if not skills_text_exists:
            try:
                skills_titles_collection.create_index([("value", "text")])
                logger.info("Created skills text index")
            except Exception as e:
                logger.warning(f"Failed to create skills text index: {str(e)}")

        # Create unique compound index for skills_titles_collection
        try:
            skills_titles_collection.create_index(
                [("type", 1), ("value", 1)], unique=True
            )
            logger.info("Created unique compound index for skills_titles_collection")
        except Exception as e:
            logger.warning(
                f"Failed to create unique compound index (may already exist): {str(e)}"
            )

        # Create vector search index
        try:
            create_vector_search_index(collection)
            if verify_vector_search_index(collection):
                logger.info("Vector search index is ready")
            else:
                logger.warning("Vector search index verification failed")
        except Exception as e:
            logger.warning(f"Vector search index creation failed: {str(e)}")

        logger.info("Application startup completed successfully!")

    except Exception as e:
        logger.error(f"Failed during startup: {e}")
        logger.warning(
            "Some indexes may not have been created, but the application will continue to run"
        )


if __name__ == "__main__":
    import uvicorn

    uvicorn.run("main:app", port=8000, reload=True)
    # uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)
