# Analyzing Student Feedback Using Artificial Intelligence techniques in Python


### Install Library Pakages


In [None]:
!pip install pandas vaderSentiment googletrans==4.0.0-rc1 nltk scikit-learn langdetect gensim

### Import Libary Pakckages

In [None]:
import pandas as pd
import re
import string

from langdetect import detect
from googletrans import Translator

import nltk
# Download the NLTK words corpus if not already downloaded
nltk.download('words')
# Tokenize and preprocess the feedback data
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import words
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

from gensim import corpora, models


from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import spacy
# Load the spaCy model (you may need to download it first)
nlp = spacy.load("en_core_web_sm")



### Define Global functions

In [None]:
def read_excel_file(path):
  '''Read excel file and retun a pandas dataframe'''

  try:
      # Read the Excel file into a DataFrame
      df = pd.read_excel(path)

      # You can now work with the data in the DataFrame
      print("Data imported successfully:")
      print(df.head())  # Print the first few rows of the DataFrame
      return df
  except FileNotFoundError:
      print(f"File not found at path: {path}")
  except Exception as e:
      print(f"An error occurred: {str(e)}")


def preprocess_text(text):
  '''Filter text for tokenization'''

  try:
    stop_words = set(stopwords.words('english'))
    tokenized_text = word_tokenize(text.lower())
    filtered_text = [word for word in tokenized_text if word.isalpha() and word not in stop_words]
    return filtered_text
  except :
    pass


def clean_text_questions(text):
  '''Clean text to keep content'''

  pattern = r'CLO\s*:\s*(-\s*\d+)?'

  # Remove the patterns
  cleaned_text = re.sub(pattern, '', text)

  return cleaned_text


def detect_and_translate(text):
  '''Detect text language and if it is arabic , transalte it to english. Otherswise leave it as it is'''

  # Detect the language of the input text
  detected_language = detect(text)

  # If the detected language is Arabic, translate to English
  if detected_language == 'ar':
      translator = Translator()
      translator.raise_Exception = True
      translated_text = translator.translate(text, src='ar', dest='en')
      return translated_text.text
  else:
      return text


def keep_real_english_words(text):
  '''Keep English words to capture meaningful words and get rid of strange responses'''

  try:
    # Tokenize the text into words
    words_in_text = word_tokenize(text)

    # Get the set of real English words
    english_word_set = set(words.words())

    # Keep only real English words
    real_english_words = [word for word in words_in_text if word.lower() in english_word_set]

    # Join the remaining real English words back into a text string
    cleaned_text = ' '.join(real_english_words)

    return cleaned_text
  except:
    pass


def sentiment_VADER(text):
  '''Performing sentiment analysis without labeled data'''

  # Initialize the VADER sentiment analyzer
  analyzer = SentimentIntensityAnalyzer()
  try:
    # Analyze sentiment
    sentiment_scores = analyzer.polarity_scores(text)

    # Determine sentiment based on scores
    compound_score = sentiment_scores['compound']

    if compound_score >= 0.05:
      sentiment = "Positive"
    elif compound_score <= -0.05:
      sentiment = "Negative"
    else:
      sentiment = "Neutral"

    return sentiment
  except:
    pass


def sentiment_spaCy(text):
  '''Sentiment analysis using pre-trained spaCy models.'''
  try:
    doc = nlp(text)

    # Calculate the sentiment score based on word vectors
    sentiment_score = doc.sentiment

    if sentiment_score >= 0.2:
        return "Positive"
    elif sentiment_score <= -0.2:
        return "Negative"
    else:
        return "Neutral"
  except:
    pass

## Import Datasets

In [None]:
comments = read_excel_file('AI_Engineer_Dataset_Task_1.xlsx')
cources = read_excel_file('AI_Engineer_Dataset_Task_2.xlsx')

### Pre processing texts

Fill null values with 'NOS' -> No String

In [None]:
comments[['QuestionText', 'ParticipantResponse']] = comments[['QuestionText', 'ParticipantResponse']].fillna('NOS')

Remove special pattern in text

In [None]:
# Apply the clean function to the text columns
comments['QuestionText'] = comments['QuestionText'].apply(clean_text_questions)

Translate text if it is not english

In [None]:
# Apply the translate function to the text columns
comments['QuestionText'] = comments['QuestionText'].apply(detect_and_translate)
comments['ParticipantResponse'] = comments['ParticipantResponse'].apply(detect_and_translate)


Keep meaningful english words

In [None]:
comments['ParticipantResponse'] = comments['ParticipantResponse'].apply(keep_real_english_words)

## Sentiment Analysis



In [None]:
# Add new column as sentiment of text
comments['ResponseSentiment_VADER'] = comments['ParticipantResponse'].apply(sentiment_VADER)
comments['ResponseSentiment_spaCy'] = comments['ParticipantResponse'].apply(sentiment_spaCy)


#### Topic Modeling

In [None]:
# feedback data

feedbacks = comments['ParticipantResponse'].tolist()
tokenized_feedbacks = [preprocess_text(feedback) for feedback in feedbacks if feedback]

# Create a dictionary and corpus for LDA
dictionary = corpora.Dictionary(tokenized_feedbacks)
corpus = [dictionary.doc2bow(text) for text in tokenized_feedbacks]

# Apply LDA model
lda_model = models.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=15)

# Print the identified topics and their keywords
topics = lda_model.print_topics(num_words=5)
for topic in topics:
    print(f"Topic {topic[0]}: {topic[1]}")

# Get the dominant topic for each feedback
for i, feedback in enumerate(feedbacks):
    if not feedback:
        continue  # Skip null or empty feedback
    bow = dictionary.doc2bow(tokenized_feedbacks[i])
    dominant_topic = max(lda_model[bow], key=lambda x: x[1])
    print(f"Feedback {i + 1}: Dominant Topic - {dominant_topic[0]}, Probability - {dominant_topic[1]:.4f}")
