### Task 7 Internship feedback sentiment analysis

In [2]:
#Step 1: Install Libraries

In [16]:
import pandas as pd
import numpy as np
import re
import string

In [None]:
#Load dataset

In [10]:
df = pd.read_csv("internship_feedback_dataset_large.csv")


In [12]:
print("Dataset Shape:", df.shape)
print(df.head(5))


Dataset Shape: (2500, 7)
  Feedback_ID        Date Intern_ID      Department       Source  \
0    FB000001  2025-04-01  INT00197    Software Dev       Survey   
1    FB000002  2024-11-01  INT00219  Data Analytics  Google Form   
2    FB000003  2025-08-01  INT00045           Sales  Google Form   
3    FB000004  2025-03-01  INT00039       Marketing        Email   
4    FB000005  2025-01-01  INT00093       Marketing  Google Form   

                                      Feedback_Text True_Label  
0     Tasks were boring and not related to my role.   Negative  
1  Workload was normal, nothing unusual. Thank you.    Neutral  
2       Poor communication and management issues. ðŸ˜ž   Negative  
3  Great learning experience and supportive mentor.   Positive  
4    The team was very helpful and I learned a lot.   Positive  


In [14]:
#Step 3: Clean Text

In [18]:
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)   # remove links
    text = text.translate(str.maketrans("", "", string.punctuation))  # remove punctuation
    text = re.sub(r"\s+", " ", text).strip()
    return text

df["Clean_Text"] = df["Feedback_Text"].apply(clean_text)

print("\n Cleaned Text Example:")
print(df[["Feedback_Text","Clean_Text"]].head(3))


 Cleaned Text Example:
                                      Feedback_Text  \
0     Tasks were boring and not related to my role.   
1  Workload was normal, nothing unusual. Thank you.   
2       Poor communication and management issues. ðŸ˜ž   

                                      Clean_Text  
0   tasks were boring and not related to my role  
1  workload was normal nothing unusual thank you  
2     poor communication and management issues ðŸ˜ž  


In [20]:
#Step 4: Sentiment Scoring + Classification

In [22]:
positive_words = set([
    "great","good","excellent","amazing","helpful","supportive","enjoyed","enjoy",
    "satisfied","friendly","productive","timely","constructive","learned","learning",
    "clear","meaningful","happy","improved","guidance","exposure"
])

negative_words = set([
    "lack","unclear","workload","unrealistic","stressful","disorganized",
    "boring","poor","issues","late","insufficient","confusing","disappointing",
    "valued","deadline","deadlines","not"
])

def sentiment_score(text):
    tokens = clean_text(text).split()
    pos = sum(1 for t in tokens if t in positive_words)
    neg = sum(1 for t in tokens if t in negative_words)
    return (pos - neg) / (len(tokens) + 1e-6)

def classify(score):
    if score > 0.02:
        return "Positive"
    elif score < -0.02:
        return "Negative"
    else:
        return "Neutral"

df["Sentiment_Score"] = df["Feedback_Text"].apply(sentiment_score)
df["Predicted_Sentiment"] = df["Sentiment_Score"].apply(classify)

print("\n Sentiment Analysis Sample Results:")
print(df[["Feedback_Text","Sentiment_Score","Predicted_Sentiment"]].head(10).to_string(index=False))



 Sentiment Analysis Sample Results:
                                    Feedback_Text  Sentiment_Score Predicted_Sentiment
    Tasks were boring and not related to my role.        -0.222222            Negative
 Workload was normal, nothing unusual. Thank you.        -0.142857            Negative
      Poor communication and management issues. ðŸ˜ž        -0.333333            Negative
 Great learning experience and supportive mentor.         0.500000            Positive
   The team was very helpful and I learned a lot.         0.200000            Positive
Very satisfied with management and communication.         0.166667            Positive
   The team was very helpful and I learned a lot.         0.200000            Positive
                  Training sessions were average.         0.000000             Neutral
                      Nothing major to highlight.         0.000000             Neutral
 Great learning experience and supportive mentor.         0.500000            Positive


In [24]:
#Step 5: Show Sentiment Counts

In [26]:
print("\n Sentiment Count Summary:")
print(df["Predicted_Sentiment"].value_counts())



 Sentiment Count Summary:
Predicted_Sentiment
Positive    1499
Neutral      554
Negative     447
Name: count, dtype: int64


In [28]:
#Step 6: Confusion Matrix + Accuracy

In [30]:
accuracy = (df["Predicted_Sentiment"] == df["True_Label"]).mean()
conf_matrix = pd.crosstab(df["True_Label"], df["Predicted_Sentiment"])

print("\n Confusion Matrix (True vs Predicted):")
print(conf_matrix)

print(f"\n Model Accuracy: {accuracy:.2%}")



 Confusion Matrix (True vs Predicted):
Predicted_Sentiment  Negative  Neutral  Positive
True_Label                                      
Negative                  344       55        15
Neutral                    83      461       167
Positive                   20       38      1317

 Model Accuracy: 84.88%


In [32]:
#Step 7: Save Final Output

In [34]:
df.to_csv("internship_feedback_with_sentiment.csv", index=False)
print("\n File saved: internship_feedback_with_sentiment.csv")



 File saved: internship_feedback_with_sentiment.csv


## Accuracy achieved on this dataset: 84.88%