<a href="https://colab.research.google.com/github/zaib2003/Analysis/blob/main/internship.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

try:
    import chardet
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "chardet"])
    import chardet

from transformers import pipeline
from textblob import TextBlob
import pandas as pd
import nltk
import os

# Setup
nltk.download('punkt')

def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        return chardet.detect(f.read())['encoding']

def find_text_column(df):
    # Priority list of column names that likely contain text
    priority_columns = ['text', 'content', 'review', 'comment', 'tweet', 'message']

    # First try exact matches (case insensitive)
    for col in df.columns:
        if col.lower() in priority_columns:
            return col

    # Then try partial matches
    for col in df.columns:
        if any(keyword in col.lower() for keyword in priority_columns):
            return col

    # If still not found, return first non-ID column
    non_id_columns = [col for col in df.columns if 'id' not in col.lower()]
    return non_id_columns[0] if non_id_columns else None

def analyze_comment(comment):
    try:
        comment = str(comment).strip()
        if not comment:
            return None

        blob = TextBlob(comment)
        sentiment_score = blob.sentiment.polarity
        sentiment = 'Positive' if sentiment_score > 0 else 'Negative' if sentiment_score < 0 else 'Neutral'

        emotions = emotion_classifier(comment)[0]
        emotion_scores = {e['label'].capitalize(): round(e['score'], 4) for e in emotions}

        return {
            'Original_Text': comment,
            'Sentiment': sentiment,
            'Sentiment_Score': round(sentiment_score, 4),
            **emotion_scores
        }
    except Exception as e:
        print(f"Error analyzing comment: {str(e)}")
        return None

def analyze_user_comment():
    while True:
        print("\n" + "="*50)
        user_comment = input("Enter a comment to analyze (or type 'exit' to quit): ")

        if user_comment.lower() == 'exit':
            break

        if not user_comment.strip():
            print("Please enter a valid comment.")
            continue

        result = analyze_comment(user_comment)
        if result:
            print("\nAnalysis Results:")
            for key, value in result.items():
                if key == 'Original_Text':
                    print(f"{key}: {value[:100]}{'...' if len(value) > 100 else ''}")
                else:
                    print(f"{key}: {value}")

        another = input("\nAnalyze another comment? (y/n): ").lower()
        if another != 'y':
            break

def process_csv_file():
    input_file = "sentiment-analysis.csv"
    try:
        # Read file
        encoding = detect_encoding(input_file)
        print(f"\nDetected encoding: {encoding}")

        df = pd.read_csv(input_file, encoding=encoding)
        print(f"\nAvailable columns: {', '.join(df.columns)}")

        # Find appropriate text column
        text_column = find_text_column(df)
        if not text_column:
            raise ValueError("Could not find suitable text column")

        print(f"\nAnalyzing column: '{text_column}'")
        print(f"First sample: {df[text_column].iloc[0][:50]}...")

        # Analyze
        results = []
        for comment in df[text_column].dropna():
            result = analyze_comment(comment)
            if result:
                results.append(result)

        # Save results
        if results:
            os.makedirs("data", exist_ok=True)
            output_file = "data/analyzed_comments.csv"
            pd.DataFrame(results).to_csv(output_file, index=False, encoding='utf-8')

            print(f"\n✅ Successfully analyzed {len(results)}/{len(df)} comments")
            print(f"Results saved to {output_file}")

            # Show better sample output
            sample = pd.DataFrame(results[:3])
            print("\nSample results:")
            print(sample.to_string(index=False))
        else:
            print("❌ No valid comments were analyzed")

    except Exception as e:
        print(f"❌ Error processing file: {str(e)}")

# Load emotion classifier
print("Loading emotion classifier (this may take a moment)...")
emotion_classifier = pipeline("text-classification",
                            model="nateraw/bert-base-uncased-emotion",
                            top_k=None)

# Main menu
def main():
    while True:
        print("\n" + "="*50)
        print("SENTIMENT AND EMOTION ANALYSIS TOOL")
        print("="*50)
        print("\n1. Analyze CSV file (sentiment-analysis.csv)")
        print("2. Analyze user input comments")
        print("3. Exit")

        choice = input("\nEnter your choice (1-3): ")

        if choice == '1':
            process_csv_file()
        elif choice == '2':
            analyze_user_comment()
        elif choice == '3':
            print("Exiting program...")
            break
        else:
            print("Invalid choice. Please enter 1, 2, or 3.")

if __name__ == "__main__":
    main()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Loading emotion classifier (this may take a moment)...


Device set to use cpu



SENTIMENT AND EMOTION ANALYSIS TOOL

1. Analyze CSV file (sentiment-analysis.csv)
2. Analyze user input comments
3. Exit

Enter your choice (1-3): 2

Enter a comment to analyze (or type 'exit' to quit): how are you ?

Analysis Results:
Original_Text: how are you ?
Sentiment: Neutral
Sentiment_Score: 0.0
Joy: 0.9296
Sadness: 0.022
Anger: 0.0212
Fear: 0.0125
Love: 0.0079
Surprise: 0.0068

Analyze another comment? (y/n): y

Enter a comment to analyze (or type 'exit' to quit): i am angry

Analysis Results:
Original_Text: i am angry
Sentiment: Negative
Sentiment_Score: -0.5
Anger: 0.9926
Fear: 0.0027
Sadness: 0.0017
Surprise: 0.0012
Love: 0.001
Joy: 0.0008

Analyze another comment? (y/n): n

SENTIMENT AND EMOTION ANALYSIS TOOL

1. Analyze CSV file (sentiment-analysis.csv)
2. Analyze user input comments
3. Exit

Enter your choice (1-3): 3
Exiting program...
