In [1]:
# pip install -r requirements.txt

In [2]:
import pandas as pd
import numpy as np
from review_analyzer import ReviewAnalyzer
from datetime import datetime

# Initialize the analyzer with your data files
analyzer = ReviewAnalyzer(
    google_maps_file="reviews/google/allGoogleReviews_2025-04-06.xlsx",
    trustpilot_file="reviews/trustpilot/allTrustpilotReviews_2025-04-06.xlsx",
    establishment_file="establishments/establishment_base.xlsx"
)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\yigit\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\yigit\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\yigit\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\yigit\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Loading data...
Loaded 15262 Google Maps reviews
Loaded 694 Trustpilot reviews
Loaded 100 establishments
Preprocessing data...
Combined dataset created with 15956 reviews
Filtering reviews to keep only hair transplantation related content...
Filtered out 123 reviews unrelated to hair transplantation
Remaining reviews: 15833
Data preprocessing completed


## 1. Basic Metrics Analysis

In [3]:
# Calculate basic metrics
basic_metrics = analyzer.calculate_basic_metrics()
print("\nBasic Metrics Summary:")
print("-" * 80)
display(basic_metrics[['title', 'total_reviews', 'avg_rating', 'rating_std', 'response_rate', 'avg_response_time']].head(10))

Calculating basic metrics...
Basic metrics calculated for 53 establishments

Basic Metrics Summary:
--------------------------------------------------------------------------------


Unnamed: 0,title,total_reviews,avg_rating,rating_std,response_rate,avg_response_time
0,Estetistanbul Tıp Merkezi & Saç Ekimi,99,4.494949,1.215325,3.030303,35.333333
1,Self Hair Clinic - Saç Ekimi - Hair Transplant...,16,4.9375,0.25,25.0,3.75
2,Live Hair Clinic İstanbul Saç Ekim Merkezi,2,5.0,0.0,50.0,2.0
3,Estesie Clinic | İstanbul Medikal Estetik Merk...,69,4.942029,0.291248,21.73913,62.9375
4,Saç ekimi Türkiye,76,4.894737,0.555673,98.684211,12.103448
5,BAC Clinic,73,4.986301,0.117041,43.835616,21.608696
6,Art of Med Clinic - Saç Ekimi & Hair Transplant,85,4.988235,0.108465,76.470588,35.088608
7,Mega Hair Trans Hair Transplant Center Etiler,85,4.188235,1.592349,1.176471,0.0
8,Elit İstanbul Hair Transplant,97,4.876289,0.599613,11.340206,19.75
9,Hair Transplant - Holiday Estetic - Best Hair ...,102,4.901961,0.571775,79.411765,37.842105


In [4]:
# Filter basic metrics for placeId=ChIJW40wCG-2yhQRuPTD5f7bqIc
filtered_basic_metrics = basic_metrics[basic_metrics['placeId'] == 'ChIJW40wCG-2yhQRuPTD5f7bqIc']
print("\nFiltered Basic Metrics for placeId=ChIJW40wCG-2yhQRuPTD5f7bqIc:")
print("-" * 80)
display(filtered_basic_metrics[['title', 'total_reviews', 'avg_rating', 'rating_std', 'response_rate', 'avg_response_time']].head(10))


Filtered Basic Metrics for placeId=ChIJW40wCG-2yhQRuPTD5f7bqIc:
--------------------------------------------------------------------------------


Unnamed: 0,title,total_reviews,avg_rating,rating_std,response_rate,avg_response_time
0,Estetistanbul Tıp Merkezi & Saç Ekimi,99,4.494949,1.215325,3.030303,35.333333


## 2. Sentiment Analysis

In [5]:
# Calculate sentiment metrics
sentiment_metrics = analyzer.calculate_sentiment_metrics()
print("\nSentiment Analysis Summary:")
print("-" * 80)
display(sentiment_metrics.head(10))

Calculating sentiment metrics...
Sentiment metrics calculated for 53 establishments

Sentiment Analysis Summary:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,avg_sentiment,sentiment_std,positive_review_pct,negative_review_pct,neutral_review_pct,sentiment_trend
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,0.538462,0.438863,70.707071,10.10101,4.040404,-0.026023
1,ChIJI184CMjHyhQRa8MxkyaZQqc,0.906587,0.103418,100.0,0.0,0.0,-0.025725
2,ChIJOcutT1HHyhQREVaU7GyaLhY,0.88565,0.076863,100.0,0.0,0.0,0.0
3,ChIJrQMnfaijyhQR_cFRj9KRFcA,0.844353,0.15383,79.710145,0.0,0.0,0.026431
4,ChIJGzSHbVa2yhQRxBFFkJlornE,0.827289,0.285869,84.210526,1.315789,1.315789,0.098961
5,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,0.70369,0.363511,73.972603,4.109589,6.849315,0.151485
6,ChIJh_ysVV4u-QYRoqwbKYJhh3c,0.873336,0.157051,91.764706,0.0,0.0,-0.005236
7,ChIJSUME9RS2yhQR_KS7gwoQ6_s,0.611023,0.52054,69.411765,10.588235,1.176471,0.194201
8,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,0.815783,0.296042,83.505155,2.061856,3.092784,0.036998
9,ChIJ9TH-BX_FyhQRhUh02JKvMu8,0.791104,0.289531,66.666667,0.980392,1.960784,-0.052267


## 3. Aspect-Based Sentiment Analysis

In [6]:
# Calculate aspect-based sentiment
aspect_metrics = analyzer.calculate_aspect_sentiment()
print("\nAspect-Based Sentiment Analysis:")
print("-" * 80)
display(aspect_metrics.head(10))

Calculating aspect-based sentiment...
Aspect-based sentiment calculated for 53 establishments

Aspect-Based Sentiment Analysis:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,service_mention_rate,service_sentiment,quality_mention_rate,quality_sentiment,price_mention_rate,price_sentiment,ambiance_mention_rate,ambiance_sentiment,cleanliness_mention_rate,cleanliness_sentiment,location_mention_rate,location_sentiment
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,11.111111,0.295142,21.212121,0.716111,5.050505,1.1102230000000002e-17,2.020202,0.734375,0.0,,0.0,
1,ChIJI184CMjHyhQRa8MxkyaZQqc,50.0,0.265139,56.25,0.66787,25.0,0.4783681,0.0,,12.5,0.124167,0.0,
2,ChIJOcutT1HHyhQREVaU7GyaLhY,0.0,,0.0,,0.0,,0.0,,0.0,,0.0,
3,ChIJrQMnfaijyhQR_cFRj9KRFcA,28.985507,0.4055,36.231884,0.61975,1.449275,0.4333333,1.449275,0.50875,13.043478,0.381543,0.0,
4,ChIJGzSHbVa2yhQRxBFFkJlornE,31.578947,0.44582,27.631579,0.624383,3.947368,0.3983681,5.263158,0.484722,2.631579,0.43783,3.947368,0.123368
5,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,34.246575,0.413608,32.876712,0.640145,5.479452,0.2079792,2.739726,0.640625,6.849315,0.521889,2.739726,-0.0225
6,ChIJh_ysVV4u-QYRoqwbKYJhh3c,12.941176,0.591677,32.941176,0.586305,1.176471,0.08333333,4.705882,0.283996,1.176471,0.434333,1.176471,0.0
7,ChIJSUME9RS2yhQR_KS7gwoQ6_s,16.470588,0.527136,40.0,0.440205,9.411765,-0.053125,2.352941,0.387333,2.352941,0.412424,0.0,
8,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,23.71134,0.387075,36.082474,0.558523,6.185567,0.342803,5.154639,0.369929,2.061856,0.419792,2.061856,0.012083
9,ChIJ9TH-BX_FyhQRhUh02JKvMu8,25.490196,0.467921,28.431373,0.646811,2.941176,0.1075,3.921569,0.324896,0.980392,0.425833,2.941176,0.01875


## 4. Temporal Pattern Analysis

In [7]:
# Analyze temporal patterns
temporal_metrics = analyzer.analyze_temporal_patterns()
print("\nTemporal Pattern Analysis:")
print("-" * 80)
display(temporal_metrics.head(10))

Analyzing temporal patterns...
Temporal analysis completed for 52 establishments

Temporal Pattern Analysis:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,rating_trend,sentiment_trend,has_seasonality,seasonal_pattern
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,-0.042754,0.004606,True,Lower ratings in months: 12.
1,ChIJI184CMjHyhQRa8MxkyaZQqc,-0.013636,-0.003267,False,
2,ChIJrQMnfaijyhQR_cFRj9KRFcA,0.014286,0.002097,True,"Lower ratings in months: 2, 4."
3,ChIJGzSHbVa2yhQRxBFFkJlornE,0.007048,0.016069,True,"Lower ratings in months: 5, 10."
4,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,-0.002747,0.012717,True,Lower ratings in months: 4.
5,ChIJh_ysVV4u-QYRoqwbKYJhh3c,-0.002541,-0.002755,True,Lower ratings in months: 8.
6,ChIJSUME9RS2yhQR_KS7gwoQ6_s,-0.048496,0.011652,True,Higher ratings in months: 3. Lower ratings in ...
7,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,0.009445,0.001117,True,"Lower ratings in months: 2, 11."
8,ChIJ9TH-BX_FyhQRhUh02JKvMu8,-0.046114,-0.00967,True,"Lower ratings in months: 3, 10."
9,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,-0.018698,-0.007743,True,Lower ratings in months: 1.


## 5. Review Authenticity Analysis

In [8]:
# Detect review authenticity
authenticity_metrics = analyzer.detect_review_authenticity()
analyzer.analyze_similarity_outliers()
display(authenticity_metrics)

Analyzing review authenticity signals...
Error calculating similarity for cluster 2 of ChIJNzo-uY3HyhQR_lRf8KR80e8: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 7 of ChIJNzo-uY3HyhQR_lRf8KR80e8: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 4 of ChIJ97BgC8i3yhQR-nOV2KMsIds: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 10 of ChIJ97BgC8i3yhQR-nOV2KMsIds: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 12 of ChIJ97BgC8i3yhQR-nOV2KMsIds: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 5 of ChIJA_WfM-rHyhQRngnOVFpDbi0: empty vocabulary; perhaps the documents only contain stop words
Error calculating similarity for cluster 14 of ChIJnxs6tamlyhQRKCL5aEDhKmA: empty vocabulary; perhaps the documents only co

Unnamed: 0,placeId,has_review_clusters,cluster_count,largest_cluster_size,avg_cluster_similarity,max_cluster_similarity,suspicious_clusters,similar_review_rate,authenticity_concerns,cluster_similarity_alert
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,True,7,28,0.063918,0.13065,0,0.005978,False,
1,ChIJI184CMjHyhQRa8MxkyaZQqc,False,0,0,0.0,0.0,0,0.0,False,
2,ChIJrQMnfaijyhQR_cFRj9KRFcA,True,5,25,0.044444,0.066685,0,0.0,False,
3,ChIJGzSHbVa2yhQRxBFFkJlornE,True,10,12,0.059481,0.136889,0,0.000702,False,
4,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,True,3,19,0.114409,0.247013,0,0.0,False,
5,ChIJh_ysVV4u-QYRoqwbKYJhh3c,True,5,7,0.08274,0.110872,0,0.00028,False,
6,ChIJSUME9RS2yhQR_KS7gwoQ6_s,True,2,4,0.134282,0.217632,0,0.00028,False,
7,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,True,5,8,0.059476,0.129603,0,0.0,False,
8,ChIJ9TH-BX_FyhQRhUh02JKvMu8,True,6,27,0.049854,0.094803,0,0.0,False,
9,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,True,6,8,0.029218,0.052113,0,0.0,False,


## 6. Comparative Reference Analysis

In [9]:
# Analyze comparative references
comparative_metrics = analyzer.analyze_comparative_references()
print("\nComparative Reference Analysis:")
print("-" * 80)
display(comparative_metrics.head(10))

Analyzing comparative references...
Comparative reference analysis completed for 52 establishments

Comparative Reference Analysis:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,comparison_rate,positive_comparison_rate,negative_comparison_rate,total_comparisons,positive_comparison_example,negative_comparison_example
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,0.030303,1.0,0.0,3,best regards,
1,ChIJI184CMjHyhQRa8MxkyaZQqc,0.3125,0.8,0.0,5,i had spent weeks browsing the internet for th...,
2,ChIJrQMnfaijyhQR_cFRj9KRFcA,0.028986,1.0,0.0,2,"he is the best in his job, i highly recommend ...",
3,ChIJGzSHbVa2yhQRxBFFkJlornE,0.078947,1.0,0.0,6,"they did the best they could, i feel very comf...",
4,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,0.054795,1.0,0.0,4,hair transplant best place in turkey & coopera...,
5,ChIJh_ysVV4u-QYRoqwbKYJhh3c,0.047059,1.0,0.0,4,sadberk and her team for the best of this job,
6,ChIJSUME9RS2yhQR_KS7gwoQ6_s,0.058824,0.8,0.0,5,"hello dear germans and citizens, i got my hair...",
7,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,0.092784,1.0,0.0,9,"excellent treatment and customer service, i li...",
8,ChIJ9TH-BX_FyhQRhUh02JKvMu8,0.04902,1.0,0.0,5,"“after much thought and research, i decided to...",
9,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,0.113208,0.75,0.0,12,thank you so much for everything and wish you ...,


## 7. Customer Journey Analysis

In [10]:
# Analyze customer journey
journey_metrics = analyzer.analyze_customer_journey()
print("\nCustomer Journey Analysis:")
print("-" * 80)
display(journey_metrics.head(10))

Analyzing customer journey markers...
Customer journey analysis completed for 52 establishments

Customer Journey Analysis:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,first_time_rate,repeat_customer_rate,will_return_rate,wont_return_rate,customer_loyalty_score
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,2.020202,6.060606,0.0,0.0,6.060606
1,ChIJI184CMjHyhQRa8MxkyaZQqc,0.0,12.5,0.0,0.0,12.5
2,ChIJrQMnfaijyhQR_cFRj9KRFcA,0.0,2.898551,0.0,0.0,2.898551
3,ChIJGzSHbVa2yhQRxBFFkJlornE,2.631579,9.210526,0.0,0.0,9.210526
4,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,0.0,9.589041,1.369863,0.0,10.958904
5,ChIJh_ysVV4u-QYRoqwbKYJhh3c,0.0,8.235294,0.0,0.0,8.235294
6,ChIJSUME9RS2yhQR_KS7gwoQ6_s,0.0,10.588235,0.0,2.352941,8.235294
7,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,2.061856,7.216495,1.030928,0.0,8.247423
8,ChIJ9TH-BX_FyhQRhUh02JKvMu8,0.0,10.784314,0.0,0.980392,9.803922
9,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,0.0,15.09434,0.0,0.0,15.09434


## 8. Complaint Resolution Analysis

In [11]:
# Analyze complaint resolution
resolution_metrics = analyzer.analyze_complaint_resolution()
print("\nComplaint Resolution Analysis:")
print("-" * 80)
display(resolution_metrics.head(10))

Analyzing complaint resolution patterns...
Complaint resolution analysis completed for 52 establishments

Complaint Resolution Analysis:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,complaint_count,complaint_rate,complaint_response_rate,resolution_language_rate,complaint_response_time
0,ChIJW40wCG-2yhQRuPTD5f7bqIc,10,10.10101,0.0,0.0,
1,ChIJI184CMjHyhQRa8MxkyaZQqc,2,12.5,50.0,0.0,1.0
2,ChIJrQMnfaijyhQR_cFRj9KRFcA,2,2.898551,100.0,0.0,65.5
3,ChIJGzSHbVa2yhQRxBFFkJlornE,7,9.210526,100.0,14.285714,0.333333
4,ChIJ2bQmP2SxyhQR6XYGA-d8Wg8,1,1.369863,0.0,0.0,0.0
5,ChIJh_ysVV4u-QYRoqwbKYJhh3c,6,7.058824,83.333333,0.0,14.8
6,ChIJSUME9RS2yhQR_KS7gwoQ6_s,5,5.882353,0.0,0.0,
7,ChIJ9fU1i3qjyhQRS5aPjuMhhfs,2,2.061856,50.0,0.0,12.0
8,ChIJ9TH-BX_FyhQRhUh02JKvMu8,8,7.843137,75.0,0.0,125.5
9,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,8,7.54717,87.5,0.0,31.428571


## 9. Topic Modeling

In [12]:
# # Perform topic modeling analysis
# topic_modeling_metrics = analyzer.perform_topic_modeling()
# print("\nTopic Modeling Analysis:")
# print("-" * 80)
# display(topic_modeling_metrics)

## 10. Word Choice Analysis

## 11. Named Entities

## 12. Composite Score, Final Ranking and Insights

In [13]:
# Create composite score and generate insights
results = analyzer.create_composite_score()
insights = analyzer.generate_insights()

print("\nTop 10 Establishments:")
print("-" * 80)
# Sort results by composite score and display the top 10 establishments
results = results.sort_values(by='final_score', ascending=False)
display(results.head(10))

print("\nKey Insights for Top Establishments:")
print("-" * 80)
for place_id, insight in list(insights.items())[:5]:
    print(f"\n{insight['name']} (Rank: {insight['rank']}, Score: {insight['score']:.2f})")
    print("Strengths:")
    for strength in insight['strengths']:
        print(f"- {strength}")
    print("Areas for Improvement:")
    for improvement in insight['improvements']:
        print(f"- {improvement}")

Creating composite scores and rankings...
Rankings created for 53 establishments
Generating insights for top establishments...

Top 10 Establishments:
--------------------------------------------------------------------------------


Unnamed: 0,placeId,title,final_score,rank,avg_rating,total_reviews,avg_sentiment,customer_loyalty_score
51,ChIJZYs6o7LIyhQR-sGxFoifWLw,Smile Hair Clinic | Hair Transplant Turkey Ist...,100.0,1,4.854545,165,0.835015,22.424242
4,ChIJGzSHbVa2yhQRxBFFkJlornE,Saç ekimi Türkiye,93.77,2,4.894737,76,0.827289,9.210526
6,ChIJh_ysVV4u-QYRoqwbKYJhh3c,Art of Med Clinic - Saç Ekimi & Hair Transplant,93.69,3,4.988235,85,0.873336,8.235294
34,ChIJGcral8i3yhQRivzXke-APws,Medical of Istanbul Hospital Hair Transplantat...,93.17,4,4.978022,182,0.854678,10.989011
16,ChIJEaXKVOnEyhQR0itjBItvB6Y,Este Sağlık Saç Ekim Merkezi | Hair Transplant...,91.44,5,4.948052,154,0.827809,9.090909
47,ChIJ12WcZ_e3yhQRnsEKjsPkqQI,Hermest Hair Transplant Turkey,90.64,6,4.936401,739,0.829673,16.508796
1,ChIJI184CMjHyhQRa8MxkyaZQqc,Self Hair Clinic - Saç Ekimi - Hair Transplant...,87.74,7,4.9375,16,0.906587,12.5
32,ChIJ7XQxBazHyhQRlpxiROkRvHw,History Clinic (History Hair - History Dent),87.2,8,4.901639,183,0.839351,13.114754
50,ChIJuQbq64ewyhQRSEKVkD785Vk,SULE CLINIC - Hair Transplant Turkey istanbul,84.98,9,4.896552,1479,0.817873,11.764706
10,ChIJ0VNW2w63yhQR7Fa2A1ZKgJQ,HairextreM & Haydar Aslan Saç Ekimi,84.09,10,4.716981,106,0.758367,15.09434



Key Insights for Top Establishments:
--------------------------------------------------------------------------------

Smile Hair Clinic | Hair Transplant Turkey Istanbul (Rank: 1, Score: 100.00)
Strengths:
- Exceptional average rating of 4.9/5
- Very high positive sentiment (97.6%)
- Highly rated quality
Areas for Improvement:

Saç ekimi Türkiye (Rank: 2, Score: 93.77)
Strengths:
- Exceptional average rating of 4.9/5
- Highly rated quality
Areas for Improvement:
- Reduce response time to reviews

Art of Med Clinic - Saç Ekimi & Hair Transplant (Rank: 3, Score: 93.69)
Strengths:
- Exceptional average rating of 5.0/5
- Very high positive sentiment (91.8%)
- Highly rated service
- Highly rated quality
Areas for Improvement:
- Reduce response time to reviews

Medical of Istanbul Hospital Hair Transplantation & Dental & Aesthetic (Rank: 4, Score: 93.17)
Strengths:
- Exceptional average rating of 5.0/5
- Very high positive sentiment (90.7%)
- Highly rated quality
Areas for Improvement:
- I

## 10. Save Results

In [14]:
# Save all results to output directory with timestamp
analyzer.save_results("analysis_results")
print("\nAll analysis results have been saved to the 'analysis_results' directory.")

Saving results...
Enhanced rankings with scoring details saved to analysis_results/establishment_rankings_20250407_221231.xlsx
Full metrics saved to analysis_results/establishment_metrics_20250407_221231.xlsx
Insights saved to analysis_results/establishment_insights_20250407_221231.json
All results saved successfully

All analysis results have been saved to the 'analysis_results' directory.
