In [6]:
from database.db_manager import DatabaseManager
import os
import sys
from pathlib import Path


In [7]:
db_manager = DatabaseManager()

# Load tokens (same as your existing code)
with open('tokens/mongodb_connection.txt', 'r') as f:
    mongodb_connection = f.read().strip()

# Connect
db_manager.connect(mongodb_connection)

# Get distinct establishment IDs
distinct_ids = list(db_manager.db.ls_unified_reviews.distinct("establishment_id"))

print(f"Found {len(distinct_ids)} distinct establishments:")
for establishment_id in distinct_ids:
    print(establishment_id)

db_manager.close_connection()

INFO:database.db_manager:Successfully connected to MongoDB database: review_scraper
INFO:database.db_manager:Database connection closed


Found 40 distinct establishments:
687a51385c7e5bb6b9c1a5d6
687a51385c7e5bb6b9c1a5d7
687a86fa9df984d21a14752f
687a86fa9df984d21a147530
687a86fa9df984d21a147531
687a86fa9df984d21a147532
687a86fa9df984d21a147533
687a86fa9df984d21a147534
687a86fa9df984d21a147535
687a86fa9df984d21a147536
687a86fa9df984d21a147537
687a86fa9df984d21a147538
68822a391955e8dc333619c9
68822a391955e8dc333619ca
68822a391955e8dc333619cb
68822a391955e8dc333619cc
68822a391955e8dc333619cd
68822a391955e8dc333619ce
68822a391955e8dc333619cf
68822a391955e8dc333619d0
68822a391955e8dc333619d1
68822a391955e8dc333619d2
68822a391955e8dc333619d3
68822a391955e8dc333619d4
68822a391955e8dc333619d5
68822a391955e8dc333619d6
68822a391955e8dc333619d7
68822a3a1955e8dc333619d8
68822a3a1955e8dc333619d9
68822a3a1955e8dc333619da
68822a3a1955e8dc333619db
68822a3a1955e8dc333619dc
68822a3a1955e8dc333619dd
68822a3a1955e8dc333619de
68822a3a1955e8dc333619df
68822a3a1955e8dc333619e0
68822a3a1955e8dc333619e1
68822a3a1955e8dc333619e2
68822a3a1955e8dc

In [13]:
# Connect
db_manager.connect(mongodb_connection)

# Get reviews for specific establishment
establishment_id = "68822a391955e8dc333619d0" 
reviews = list(db_manager.db.ls_unified_reviews.find({"establishment_id": establishment_id}))

print(f"Found {len(reviews)} reviews for establishment {establishment_id}")

db_manager.close_connection()

INFO:database.db_manager:Successfully connected to MongoDB database: review_scraper
INFO:database.db_manager:Database connection closed


Found 256 reviews for establishment 68822a391955e8dc333619d0


In [None]:
#!/usr/bin/env python3
"""
Simple Establishment Analysis
Shows establishment_id, display_name, total_reviews, total_responses
"""

import pandas as pd
from pymongo import MongoClient

# Load MongoDB connection
with open('tokens/mongodb_connection.txt', 'r') as f:
    connection_string = f.read().strip()

# Connect to database
client = MongoClient(connection_string)
db = client["review_scraper"]

# Get establishment names
establishments = {str(doc['_id']): doc['display_name'] 
                 for doc in db.establishments.find({}, {'_id': 1, 'display_name': 1})}

# Aggregate reviews and responses by establishment
pipeline = [
    {
        '$group': {
            '_id': '$establishment_id',
            'total_reviews': {'$sum': 1},
            'total_responses': {
                '$sum': {
                    '$cond': [
                        {
                            '$and': [
                                {'$ne': ['$response_from_owner_text', None]},
                                {'$ne': ['$response_from_owner_text', '']}
                            ]
                        },
                        1,
                        0
                    ]
                }
            }
        }
    },
    {'$sort': {'total_reviews': -1}}
]

# Run aggregation
results = list(db.ls_unified_reviews.aggregate(pipeline))

# Create DataFrame
data = []
for result in results:
    establishment_id = result['_id']
    data.append({
        'establishment_id': establishment_id,
        'display_name': establishments.get(establishment_id, 'Unknown'),
        'total_reviews': result['total_reviews'],
        'total_responses': result['total_responses']
    })

df = pd.DataFrame(data)
print(df.to_string(index=False))

# Save DataFrame to CSV
df.to_csv('establishment_analysis.csv', index=False)

# Close connection
client.close()

        establishment_id                                                      display_name  total_reviews  total_responses
68822a3a1955e8dc333619dd                         Dr. Serkan Aygın - Hair Transplant Clinic           2666             2065
68822a391955e8dc333619d2                                           Smile - Hair Transplant           2189             1763
68822a3a1955e8dc333619e0                                     SULE Clinic - Hair Transplant           1591              663
687a86fa9df984d21a147535                                         Hermest - Hair Transplant            837              285
68822a391955e8dc333619cb                                            VatanMed - Hair Clinic            726              196
68822a3a1955e8dc333619e4                    Swedish - Hair Transplant and Aesthetic Clinic            725              725
687a86fa9df984d21a147538                               Haircenter - Hair Transplant Center            637              582
68822a391955e8dc

: 