# 1. Scrape Reviews

In [None]:
python operations_controller.py scrape --excel ../database/establishments.xlsx

# 2. Unify Reviews (Incremental)

In [None]:
# Unify all new reviews (incremental - only processes new reviews)
python operations_controller.py unify

# Unify specific establishments
python operations_controller.py unify --establishments "id1,id2,id3"

# Quick mode (minimal output)
python operations_controller.py unify --quick

## Full Rebuild (When Needed)

If you need to rebuild the entire unified_reviews collection from scratch:

In [None]:
# Option 1: Delete collection via MongoDB Compass/CLI, then run:
python operations_controller.py unify

# Option 2: Using MongoDB CLI (if you have access)
# db.unified_reviews.drop()
# Then run: python operations_controller.py unify

# 3. Show Statistics

In [None]:
python operations_controller.py stats

# 4. Scrape and Unify (Combined)

In [None]:
# Full workflow: scrape new establishments then unify
python operations_controller.py scrape-and-unify --excel ../database/establishments.xlsx

# With quick unification (minimal output)
python operations_controller.py scrape-and-unify --excel ../database/establishments.xlsx --quick-unify

# 5. Verbose Mode

In [None]:
# Add -v or --verbose to any command for detailed logging
python operations_controller.py unify --verbose
python operations_controller.py scrape --excel ../database/establishments.xlsx --verbose
python operations_controller.py stats --verbose

# Usage Examples

## Daily Operations

In [None]:
# Quick daily unification (only new reviews)
python operations_controller.py unify --quick

# Check database status
python operations_controller.py stats

# Full scrape and unify for new establishments
python operations_controller.py scrape-and-unify --excel new_establishments.xlsx --quick-unify

## Maintenance Operations

In [None]:
# Check current database status and statistics
python operations_controller.py stats

# Re-process all reviews (after deleting unified_reviews collection)
# First: Delete unified_reviews collection via MongoDB Compass
# Then: python operations_controller.py unify

# Verbose troubleshooting
python operations_controller.py unify --verbose

## Targeted Operations

In [None]:
# Process specific establishments only
python operations_controller.py unify --establishments "687a51385c7e5bb6b9c1a5d6,another_id"

# Re-scrape specific establishments (add them to a new Excel file)
python operations_controller.py scrape --excel specific_establishments.xlsx

## Programmatic Usage

In [None]:
from engine.operations_controller import OperationsController
from database.db_manager import DatabaseManager

# Use the operations controller directly
controller = OperationsController(verbose=False)
controller.initialize()

# Unify reviews quietly
success = controller.unify_reviews(quick=True)

# Get statistics
controller.show_statistics()

# Clean up
controller.cleanup()

# Use the database manager directly
db_manager = DatabaseManager()
mongodb_connection = "your_connection_string"
db_manager.connect(mongodb_connection)

# Run incremental unification
results = db_manager.unify_reviews_incremental()
print(f"Unified: {results}")

# Get stats
stats = db_manager.get_unified_reviews_stats()
print(f"Stats: {stats}")

db_manager.close_connection()

# Common Workflows

## Adding New Establishments

In [None]:
# 1. Add new establishments to Excel file
# 2. Run combined operation
python operations_controller.py scrape-and-unify --excel new_establishments.xlsx

# 3. Check results
python operations_controller.py stats

## Regular Data Updates

In [None]:
# Daily: Unify any new reviews that were scraped
python operations_controller.py unify --quick

# Weekly: Full statistics review
python operations_controller.py stats

# Monthly: Re-scrape existing establishments (use same Excel file)
python operations_controller.py scrape --excel ../database/establishments.xlsx
python operations_controller.py unify

## Troubleshooting

In [None]:
# Debug with verbose logging
python operations_controller.py unify --verbose

# Check if unification is working properly
python operations_controller.py stats

# Process specific problematic establishments
python operations_controller.py unify --establishments "problematic_id" --verbose

# Full rebuild if needed (after backing up data)
# 1. Delete unified_reviews collection in MongoDB
# 2. python operations_controller.py unify --verbose

# Performance Tips

- **Use `--quick` for daily unification** to reduce output and improve speed
- **Process in batches**: The system automatically batches 1000 reviews at a time
- **Incremental by default**: Only new reviews are processed, making regular runs fast
- **Use verbose mode only for debugging** as it generates more I/O
- **Monitor with stats**: Regular stats checks help identify issues early
- **Indexes are auto-created**: The system creates optimal indexes automatically