In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add the modules directory to Python path
module_path = os.path.abspath(os.path.join('..', 'scripts'))
if module_path not in sys.path:
    sys.path.append(module_path)

#import modules
from scraper import main as run_scraper
from preprocessor import ReviewPreprocessor
from sentiment_analysis import BankReviewAnalyzer
from visualizer import BankDataVisualizer


In [None]:
print("[ Starting Scraper... ]")

# Run the main scraper function
raw_df = run_scraper()

print("\n[INFO] ------   Scraping Finished.   ------")
display(raw_df.head())

In [None]:

preprocessor = ReviewPreprocessor()
success = preprocessor.process()

if success:
    print("\n Preprocessing finished successfully!")
    df = preprocessor.df
else:
    print(" Preprocessing failed.")

In [None]:
print("===== sentiment analysis started =====")
analyzer=BankReviewAnalyzer()   #initializing
analyzer.run_complete_multi_bank_analysis('./data/processed/reviews_processed.csv')  #performing compleet analysis

if analyzer:
    print("\n sentiment analysis finished successfully!")
else:
    print("\n sentiment alalysis failed.")

In [None]:
#visualization 

viz = BankDataVisualizer("bank_reports")
viz.run_all()

In [None]:

module_path = os.path.abspath(os.path.join('..', 'config'))
if module_path not in sys.path:
    sys.path.append(module_path)

import psycopg2 
from psycopg2.extras import execute_values
import pandas as pd
from config import DB_CONFIG_

In [None]:
DB_CONFIG={
    'host':DB_CONFIG_['DB_HOST'],
    'database':DB_CONFIG_['DB_NAME'],
    'user':DB_CONFIG_['DB_USER'],
    'password':DB_CONFIG_['DB_PASSWORD'],
    'port':DB_CONFIG_['DB_PORT']
}

df_bank_data=pd.read_csv("./data/raw/app_info.csv")
CSV_DIR="./bank_reports"

In [None]:
try:
    conn=psycopg2.connect(**DB_CONFIG)
    cur=conn.cursor()
    print("database connected successfully")
except Exception as e:
    print("error connecting to database")

In [None]:
print(f"loaded {len(df_bank_data)} bank data")
#insert data to db
if conn and cur:
    for _,row in df_bank_data.iterrows():
        try:
            cur.execute(
                """insert into banks (bank_name,app_name)
                 VALUES (%s, %s)
                ON CONFLICT (bank_name) DO NOTHING""",
                (row['bank_name'],row['title'])
            )
        except Exception as e:
            print(f"error inserting data")
    conn.commit()
    print("data inserted successfully!")

In [None]:
# # Get bank mapping from database
if conn and cur:
    cur.execute("SELECT bank_id, bank_name FROM banks")
    bank_mapping = {row[1]: row[0] for row in cur.fetchall()}
    
    print("Bank mapping (bank_name -> bank_id):")
    for bank_name, bank_id in bank_mapping.items():
        print(f"  {bank_name}: {bank_id}")


In [None]:
# Get bank mapping
cur.execute("SELECT bank_id, bank_name FROM banks")
bank_mapping = {str(bank_name).strip(): bank_id for bank_id, bank_name in cur.fetchall()}

In [None]:
import glob
# Get CSV files
csv_files = glob.glob(os.path.join(CSV_DIR, "*.csv"))
total_inserted = 0

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    bank_name = str(df['bank_name'].iloc[0]).strip()
    
    # Find bank_id (case-insensitive)
    bank_id = bank_mapping.get(bank_name)
    if not bank_id:
        for db_name, db_id in bank_mapping.items():
            if db_name.lower() == bank_name.lower():
                bank_id = db_id
                break
    
    if not bank_id:
        print(f"Skipping {csv_file}: Bank '{bank_name}' not found in database")
        continue
    
    # Prepare data
    insert_data = []
    for _, row in df.iterrows():
        insert_data.append((
            row['review_id'],
            bank_id,
            str(row['review_text']),
            row['review_date'],
            str(row.get('sentiment_label', 'neutral')),
            float(row.get('sentiment_score', 0.0)),
            str(row.get('source', 'bank_reports'))
        ))
    
    # Insert data
    if insert_data:
        execute_values(
            cur,
            """
            INSERT INTO reviews (
                review_id, bank_id, review_text, review_date, 
                sentiment_label, sentiment_score, source_
            ) VALUES %s
            ON CONFLICT (review_id) DO NOTHING
            """,
            insert_data
        )
        
        inserted = cur.rowcount
        total_inserted += inserted
        conn.commit()
        
        print(f"Inserted {inserted} reviews for {bank_name}")

print(f"\nTotal reviews inserted: {total_inserted}")

cur.close()
conn.close()

In [None]:
# Connect to PostgreSQL

conn = psycopg2.connect(
    host='localhost',
    database ='bank_reviews',
    user='postgres',
    password='1234',
    port='5432'
)
if conn:
    print(f"database conected successfully")

In [None]:
#displaying results

def run_query(sql):
    return pd.read_sql(sql,conn)

sql="select * from banks"

run_query(sql)

cur.close()
conn.close()