# Importing Libraries. 
performing all necessary imports here

In [1]:
# import model related libraries
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

# import huggingface login
from huggingface_hub import login

# import the accesstoken of huggingface, which is saved in .env file
# from dotenv import load_dotenv # it was not running on kaggle so i have commented it
import os

# import time to compute how long it took the model to run
import time

# import date-time to display the date and time of last run
from datetime import datetime

# import text wrap to make sure its fully displayable
import textwrap

# import torch as we want to set its datatype as this is a larger model
import torch

# Initialize Inputs
here we initailize all inputs given, like the input text, the prompt, and the model name

### Input Text
here is the email we will be passing for all models: this is given in the question and stays constant

In [2]:
input_text = """Subject: Concerns About Professor X’s Conduct 
 
Dear Dr. Ustaad, 
I hope this email finds you well. I am writing to express my concerns about Professor X’s conduct during the Introduction to Zoology class last semester. On multiple occasions, Professor X made dismissive remarks about students’ questions and failed to provide clear feedback on assignments.  
 
Additionally, the grading seemed inconsistent and unfair, with no opportunity for clarification or appeal. 
 
I found this experience deeply frustrating and demotivating, and I believe it affected my performance in the course. I would appreciate it if the department could look into this matter and ensure that future students have a more positive and supportive learning environment. 
 
Thank you for your attention to this matter. 
Sincerely, 
Shaagird
"""

print("Input Text:\n", input_text)

Input Text:
 Subject: Concerns About Professor X’s Conduct 
 
Dear Dr. Ustaad, 
I hope this email finds you well. I am writing to express my concerns about Professor X’s conduct during the Introduction to Zoology class last semester. On multiple occasions, Professor X made dismissive remarks about students’ questions and failed to provide clear feedback on assignments.  
 
Additionally, the grading seemed inconsistent and unfair, with no opportunity for clarification or appeal. 
 
I found this experience deeply frustrating and demotivating, and I believe it affected my performance in the course. I would appreciate it if the department could look into this matter and ensure that future students have a more positive and supportive learning environment. 
 
Thank you for your attention to this matter. 
Sincerely, 
Shaagird



### Model Params
here we intialize the model name, its parameters, and then the text_generator we will be using

In [3]:
model_name = "microsoft/phi-4"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/820 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.77G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.77G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.61M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/917k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.25M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [4]:
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=5000,
    do_sample=False
)

Device set to use cuda:0


# Login, Authentication
here we authenticate the hugging face access token

In [5]:
# first we load the access token from the .env hidden file (it is a gitignore file)
# load_dotenv() # it was not running on kaggle so i have commentated it
# api_token = os.getenv("HUGGINGFACE_API_TOKEN")
api_token = "" # put your api token here, have removed for github security

In [6]:
login(api_token)

# Model Running
here we run the model, but before that we display the model details and also compute the timetaken to run the model

### Model Details
here we display how many parameters and how much memory the model took

In [7]:
print(model.dtype)

# Parameters Computation
total_params = sum(p.numel() for p in model.parameters())
print(f"Model Total Parameters: {total_params / 1e9:.2f} billion")

# Memory used Computation (in MBs)
memory = total_params * 2 / (1024 ** 2)  
print(f"Estimate Memory Footprint: {memory:.2f} MB\n")

torch.bfloat16
Model Total Parameters: 14.66 billion
Estimate Memory Footprint: 27960.79 MB



## Model Running
here for every task, we will first save the start_time and the end_time and then compute the timetaken

In [8]:
def compute_time(start_time, end_time):
    """This function calculates the time difference between two given times in seconds. For this given problem, this function computes the timetaken by a model to run and perform a task

    Args:
        start_time (time): start time of the model
        end_time (time): end time of the model

    Returns:
        string: the time taken in hours, minutes, seconds, and mili-seconds
    """
    elapsed_time = end_time - start_time
    hours, rem = divmod(elapsed_time, 3600)
    minutes, rem = divmod(rem, 60)
    seconds, milliseconds = divmod(rem, 1)
    milliseconds *= 1000  # Convert seconds fraction to milliseconds
    
    return f"Time taken: {int(hours)}h {int(minutes)}m {int(seconds)}s {int(milliseconds)}ms"

### Prompt Making
here we define the prompt we will use for all tasks of summarization, question answering, keyword extraction and translation

In [9]:
prompt = f"""
Perform the following actions: 
1 - Summarize the following text delimited by triple backticks.
2 - Answer “What was the reason for the student's disappointment with Professor X? ” based on the email content.
3 - Identify key details such as incidents, concerns, and requested actions. 
4 - Translate the following text into French.

Separate your answers with line breaks.
Text:
```{input_text}```
"""

### Model Execution
here we run the model by passing the prompt to a text_generator

In [11]:
# start the timer
start_time = time.time()

summarization_prompt = f"summarize the following text:\n\n{input_text}\n\nSummary:"
summarization_response = generator(summarization_prompt, max_new_tokens=150, num_return_sequences=1)
summarized_text = summarization_response[0]['generated_text'].split("Summary:")[-1].strip()

# 2. Question Answering
question = "What was the reason for the student's disappointment with Professor X?"
question_prompt = f"Context: {input_text}\n\nQuestion: {question}\n\nAnswer:"
answer_response = generator(question_prompt, max_new_tokens=200, num_return_sequences=1)
answer_extracted = answer_response[0]['generated_text']

# 3. Keyword Extraction
keyword_prompt = f"Extract important keywords from the following text:\n\n{input_text}\n\nKeywords:"
keyword_response = generator(keyword_prompt, max_new_tokens=350, num_return_sequences=1)
keyword_extracted = keyword_response[0]['generated_text'].split("Keywords:")[-1].strip()

# 4. Translation to French and Back to English
translation_prompt = f"Translate the following English text into French:\n\n{input_text}\n\nFrench Translation:\n\nNow, translate the French text back into English:\n\nFrench Text:"
# Use text generation to get the French translation and then translate it back to English
translation_response = generator(translation_prompt, max_new_tokens=375, num_return_sequences=1)
translated_text = translation_response[0]['generated_text'].split("French Translation:")[-1].split("French Text:")[-1].strip()

# end the timer
end_time = time.time()

# display time taken with output
print(compute_time(start_time, end_time))
# print("Output:\n", output[0]["generated_text"])#textwrap.fill(output[0]["generated_text"], width=80))
print("\nSummarized Text:", summarized_text)
print("\nAnswer to the question:", answer_extracted)
print("\nExtracted Keywords:", keyword_extracted)
print("\nTranslated Text:", translated_text)

Time taken: 0h 1m 22s 791ms

Summarized Text: The student, Shaagird, is expressing concerns about Professor X’s conduct in the Introduction to Zoology class. Shaagird mentions that Professor X made dismissive remarks, failed to provide clear feedback, and had inconsistent grading. Shaagird found the experience frustrating and demotivating, affecting their performance. They request the department to investigate and improve the learning environment for future students.


## response

The student, Shaagird, is writing to Dr. Ustaad to express concerns about Professor X's conduct in the Introduction to Zoology class. Shaagird reports that Professor X made dismissive remarks, failed to provide clear feedback on assignments, and had inconsistent and unfair grading. This experience was frustrating and demotivating for Shaag

Answer to the question:  The student was disappointed with Professor X because of dismissive remarks about students’ questions, lack of clear feedback on assignments, inc

In [18]:
prompt_back_to_eng = """
Translate the following text to English:
Translated Text: Objet: Préoccupations concernant le comportement du Professeur X 

Cher Dr. Ustaad, 
J'espère que ce courriel vous trouve en bonne santé. Je vous écris pour exprimer mes préoccupations concernant le comportement du Professeur X lors du cours d'introduction à la zoologie l'année dernière. À plusieurs reprises, le Professeur X a fait des remarques dédaigneuses sur les questions des étudiants et a échoué à fournir des commentaires clairs sur les devoirs. 

De plus, la notation semblait incohérente et injuste, sans possibilité de clarification ou d'appel. 

J'ai trouvé cette expérience profondément frustrante et démoralisante, et je crois qu'elle a affecté ma performance dans le cours. J'apprécierais que le département examine cette affaire et assure que les futurs étudiants aient un environnement d'apprentissage plus positif et soutenant. 

Merci de votre attention à cette affaire. 
Cordialement, 
Shaagird
"""

In [19]:
# start the timer
start_time = time.time()

# generate the output
messages = [
    {"role": "user", "content": prompt_back_to_eng}
]
output = generator(messages)

# end the timer
end_time = time.time()

# display time taken with output
print(compute_time(start_time, end_time))
print("Output:\n", output[0]["generated_text"])#textwrap.fill(output[0]["generated_text"], width=80))

Time taken: 0h 0m 23s 863ms
Output:
 **Subject: Concerns Regarding Professor X's Behavior**

Dear Dr. Ustaad,

I hope this email finds you in good health. I am writing to express my concerns about Professor X's behavior during last year's introductory zoology course. On several occasions, Professor X made dismissive remarks about students' questions and failed to provide clear feedback on assignments.

Additionally, the grading seemed inconsistent and unfair, with no opportunity for clarification or appeal.

I found this experience deeply frustrating and demoralizing, and I believe it affected my performance in the course. I would appreciate it if the department could examine this matter and ensure that future students have a more positive and supportive learning environment.

Thank you for your attention to this matter.

Sincerely,  
Shaagird


# End
here we display the last time this jupyter notebook was run to always remember it

In [20]:
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

2025-02-02 17:42:05
