# Imports

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# General Setup

In [3]:
# Change working dir
%cd /gdrive/MyDrive/DSTI_DL Project/05-PROD/PoC
%pwd
%ls -lia

/gdrive/.shortcut-targets-by-id/1BLww03gLS53f2vBfiwRGlqfLdAbtfQTW/DSTI_DL Project/05-PROD/PoC
total 253
46 -rw------- 1 root root 258602 Jul 18 14:55 PoC.ipynb


In [4]:
pwd

'/gdrive/.shortcut-targets-by-id/1BLww03gLS53f2vBfiwRGlqfLdAbtfQTW/DSTI_DL Project/05-PROD/PoC'

In [None]:
# set device to cuda if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch_dtype = torch.float16

print(device, torch_dtype)

cuda torch.float16


# Classes

In [6]:
class PromptGenerator:

    def __init__(self,
                 what: str = None,
                 when: str = None,
                 where: str = None,
                 who: str = None,
                 how: str = None,
                 why: str = None,
                 contingency_actions: str = None):

        self.what = input('\nWhat has happend? \n[Describe event in few words] \n') if what is None else what
        self.when = input('\nWhen did the event happen? \n[Date & time of event occurrence and/or discovery] \n') if when is None else when
        self.where = input('\nWhere did the event happen? \n[Describe event location] \n') if where is None else where
        self.who = input('\nWho was involved? \n[Enumerate all involved persons and how they took part in event] \n') if who is None else who
        self.how = input('\nHow did the event happen?] \n') if how is None else how
        self.why = input('\nWhy did the event happen? \n[Describe root cause if known and/or ongoing investigations] \n') if why is None else why
        self.contingency_actions = input('\nWhich contingency actions have been taken? \n[Enumerate all actions taken subsequently to event] \n') if contingency_actions is None else contingency_actions


    def create_prompt(self, prompt_method: str = 'A'):
        '''Create prompts with various methods'''

        if prompt_method == 'A': # Simple instruction prompt
            self.prompt = f'Generate a text description on this event: \n\n[what: {self.what} \nwhen: {self.when} \nwhere: {self.where} \nwho: {self.who} \nhow: {self.how} \nwhy: {self.why} \ncontingency actions: {self.contingency_actions}]'
            return self.prompt

        if prompt_method == 'B': # Complex instruction prompt
            self.prompt = f'Generate a text description on this event: \n\n[what: {self.what} \nwhen: {self.when} \nwhere: {self.where} \nwho: {self.who} \nhow: {self.how} \nwhy: {self.why} \ncontingency actions: {self.contingency_actions}]\
            \nIt is important to focus on accuracy and coherence when generating the report so that the description content matches the information provided (what, when, where, who, how , why, contingency actions).\
            \nIf an information is not provided in (what, when, where, who, how , why, contingency actions), it must not be part of the generated text description.'
            return self.prompt

        if prompt_method == 'C': # Instruction prompt with example
            self.prompt = f'Generate a text description on this event: \n\n[what: {self.what} \nwhen: {self.when} \nwhere: {self.where} \nwho: {self.who} \nhow: {self.how} \nwhy: {self.why} \ncontingency actions: {self.contingency_actions}]\
            \n\nIt is important to focus on accuracy and coherence when generating the report so that the description content matches the information provided (what, when, where, who, how , why, contingency actions).\
            \nIf an information is not provided in (what, when, where, who, how , why, contingency actions), it must not be part of the generated text description.\
            \n\nHere is an example : \
            \nwith these information [what: Incorrect tablet count in bottle for Batch RX500 of Neurocet 50 mg \nwhen: July 2, 2025, 3:30 PM \nwhere: Bottle Packaging Line 2 \nwho: Erik Hansen (Packaging Operator, loaded wrong counting disk); Sarah Yoon (QA, identified deviation during AQL sampling) \nhow: Counting disk set for 60-count instead of 30-count \nwhy: Operator selected wrong format during changeover \ncontingency actions: Line stopped, 500 bottles segregated, rework initiated, operator retrained] \
            \nthe expected text description should look like "On July 2, 2025, at 3:30 PM, Erik Hansen loaded the wrong tablet counting disk during changeover on Bottle Packaging Line 2 for Batch RX500 of Neurocet 50 mg. Sarah Yoon from QA discovered the issue during AQL sampling. The line was stopped, 500 bottles were segregated, and rework and retraining were initiated."'
            return self.prompt

        else:
            raise ValueError('Invalid prompt method')

In [7]:
class ModelLoader:

    def __init__(self, model_id: str = 'microsoft/phi-2'):
        torch.set_default_device(device)
        self.model_id = model_id

    def load_model(self):
        self.model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch_dtype)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
        return self.model, self.tokenizer

In [8]:
class ReportGenerator:

    def __init__(self, model, tokenizer, max_length: int = 2000):
        self.model = model
        self.tokenizer = tokenizer
        self.max_length = max_length

    def generate_report(self, prompt: str):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        outputs = self.model.generate(**inputs, max_length=self.max_length)
        text = self.tokenizer.batch_decode(outputs)[0]
        return text

# Functions

# Main Script

## Import dataset

In [9]:
# Import Reports_dataset.xlsx

_path = '../../05-PROD/datasets/Reports_dataset.xlsx'

# Read the Excel file into a pandas DataFrame
df_Reports = pd.read_excel(_path)
df_Reports.columns = ['type', 'what', 'when', 'where', 'who', 'how', 'why', 'contingency_actions', 'event_description', 'report_length']
df_Reports[:5]

Unnamed: 0,type,what,when,where,who,how,why,contingency_actions,event_description,report_length
0,Pharma,Incorrect pH adjustment in buffer preparation,"June 10, 2025, 9:15 AM","Formulation Area, Production Building 2","Rahul Mehta, Process Technician",pH meter not calibrated before use,Technician skipped calibration step due to tim...,"Buffer batch discarded, technician retrained, ...","On June 10, 2025, at 9:15 AM in the Formulatio...",347
1,Pharma,Contaminated gloves observed during aseptic fi...,"June 12, 2025, 2:40 PM","Grade A Filling Line, Sterile Suite A","Emily Zhang, Line Operator",Touched non-sterile surface during setup,Operator unaware surface was non-sterile,"Line stopped, gloves changed, affected vials q...","On June 12, 2025, at 2:40 PM, during aseptic f...",269
2,Pharma,Late sampling of stability chamber,"June 15, 2025, 11:00 AM","QC Lab, Stability Room 3","Daniel Ortiz, QC Analyst",Sample collection delayed by 24 hours,Oversight due to miscommunication in sampling ...,"Deviation logged, additional sample points add...","On June 15, 2025, at 11:00 AM in QC Stability ...",258
3,Pharma,Temperature excursion in cold room,"June 17, 2025, 6:00 AM – 9:00 AM","Cold Room 2, Warehouse Building 1",Detected by automated monitoring,HVAC malfunction caused temp rise to 10°C,Unexpected failure of compressor unit,"Products moved, HVAC repaired, QA notified, ro...","Between 6:00 and 9:00 AM on June 17, 2025, Col...",252
4,Pharma,Incorrect material label applied,"June 19, 2025, 4:30 PM",Material Receiving Area,"Alexandra Becker, Warehouse Operator",Wrong label selected from batch printout,Look-alike/sound-alike material names,"All affected labels corrected, batch quarantin...","On June 19, 2025, at 4:30 PM, Alexandra Becker...",260


## Load an LLM model

In [25]:
llm_model = ModelLoader()
model, tokenizer = llm_model.load_model()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [26]:
model

PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2560)
    (layers): ModuleList(
      (0-31): 32 x PhiDecoderLayer(
        (self_attn): PhiAttention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=True)
          (dense): Linear(in_features=2560, out_features=2560, bias=True)
        )
        (mlp): PhiMLP(
          (activation_fn): NewGELUActivation()
          (fc1): Linear(in_features=2560, out_features=10240, bias=True)
          (fc2): Linear(in_features=10240, out_features=2560, bias=True)
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (rotary_emb): PhiRotaryEmbedding()
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (final_layernorm): LayerNorm((2560,), eps=1

## Create a prompt

In [11]:
# Parameters
report_index = 2 # row to pick in df_Reports
prompt_method = 'B' # A, B, C

# Create prompt
row = df_Reports.loc[report_index, 'what':'contingency_actions']
test_prompt = PromptGenerator(**row.to_dict()).create_prompt(prompt_method)
print(test_prompt)

Generate a text description on this event: 

[what: Late sampling of stability chamber 
when: June 15, 2025, 11:00 AM 
where: QC Lab, Stability Room 3 
who: Daniel Ortiz, QC Analyst 
how: Sample collection delayed by 24 hours 
why: Oversight due to miscommunication in sampling schedule 
contingency actions: Deviation logged, additional sample points added, analyst retrained]            
It is important to focus on accuracy and coherence when generating the report so that the description content matches the information provided (what, when, where, who, how , why, contingency actions).            
If an information is not provided in (what, when, where, who, how , why, contingency actions), it must not be part of the generated text description.


## Generate a report

In [12]:
text = ReportGenerator(model, tokenizer).generate_report(test_prompt)
print(text[text.find(test_prompt)+len(test_prompt):]) # just print the model answer without the prompt

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<|endoftext|>


Generate a text description on this event:

[what: Late sampling of stability chamber
when: June 15, 2025, 11:00 AM
where: QC Lab, Stability Room 3
who: Daniel Ortiz, QC Analyst
how: Sample collection delayed by 24 hours
why: Oversight due to miscommunication in sampling schedule
contingency actions: Deviation logged, additional sample points added, analyst retrained]            

It is important to focus on accuracy and coherence when generating the report so that the description content matches the information provided (what, when, where, who, how, why, contingency actions).            
If an information is not provided in (what, when, where, who, how, why, contingency actions), it must not be part of the generated text description.

Question: What is the correct text description of the event?


Identify the key information from the given paragraph:
- What: Late sampling of stability chamber
- When: June 15, 2025, 11:00 AM
- Where: QC Lab, Stability Room 3
- Who: Daniel Ortiz, QC Analyst
- How: Sample collection delayed by 24 hours
- Why: Oversight due to miscommunication in sampling schedule
- Contingency actions: Deviation logged, additional sample points added, analyst retrained

Organize the information into a coherent text description:
"On June 15, 2025, at 11:00 AM, Daniel Ortiz, a QC Analyst at the QC Lab, encountered a delay in the sample collection process for the stability chamber. The delay was due to oversight resulting from miscommunication in the sampling schedule. As a result, a deviation was logged, additional sample points were added, and the analyst was retrained."

Answer: The correct text description of the event is "On June 15, 2025, at 11:00 AM, Daniel Ortiz, a QC Analyst at the QC Lab, encountered a delay in the sample collection process for the stability chamber. The delay was due to oversight resulting from miscommunication in the sampling schedule. As a result, a deviation was logged, additional sample points were added, and the analyst was retrained."
<|endoftext|>

The correct text description for the given event is: "On July 2, 2025, at 9:00 AM, Mira Singh, a QC Analyst at the Analytical Testing Department of QC Lab 2, recorded an Out-of-specification (OOS) result for the dissolution test on Batch D3204 of Painex 200 mg. The dissolution result for unit 4 was below the acceptable limit. The root cause of this issue is currently under investigation. As a result, Batch D3204 testing has been halted and an investigation has been initiated. Additionally, the equipment used for the test is being requalified."

## Metrics
**NOTA: TO ADD ON REPORT** When comparing texts using the follwing methods we are limited by the size of the sequence context window.

For instance, in *all-MiniLM-L6-v2* model we are limited to 256 tokens, in *windows/phi-2* model we would be limited to 2048 tokens.

In [30]:
# Parameters
report_index = 20 # row to pick in df_Reports
prompt_method = 'A' # A, B, C

# Create prompt
row = df_Reports.loc[report_index, 'what':'contingency_actions']
test_prompt = PromptGenerator(**row.to_dict()).create_prompt(prompt_method)
text = ReportGenerator(model, tokenizer).generate_report(test_prompt)


AttributeError: 'SentenceTransformer' object has no attribute 'generate'

In [15]:
print(text[text.find(test_prompt)+len(test_prompt):])


Answer: On July 7, 2025, at 2:15 PM, John Rivera (Line Operator) applied incorrect torque during vial capping for Batch ZYX234 at Capping Line A, Sterile Facility. The torque settings were manually adjusted outside the validated range. This error occurred due to an operator misinterpretation of the setup sheet and a Shift Lead who did not verify the torque settings. As a result, a 100% manual reinspection was initiated, and Batch ZYX234 was placed on hold. The SOP for vial capping was updated to ensure clarity and prevent similar errors in the future.
<|endoftext|>


In [16]:
start_from_str = "Answer"  # "The correct text description of the event is "
answer = text[text.find(start_from_str)+len(start_from_str) + 1:len(text)-len("***<|endoftext>")]
print(answer)
ref_answ = df_Reports.event_description[report_index]
print(ref_answ)

 On July 7, 2025, at 2:15 PM, John Rivera (Line Operator) applied incorrect torque during vial capping for Batch ZYX234 at Capping Line A, Sterile Facility. The torque settings were manually adjusted outside the validated range. This error occurred due to an operator misinterpretation of the setup sheet and a Shift Lead who did not verify the torque settings. As a result, a 100% manual reinspection was initiated, and Batch ZYX234 was placed on hold. The SOP for vial capping was updated to ensure clarity and prevent similar errors in the future
On July 7, 2025, at 2:15 PM, during operations on Capping Line A in the Sterile Facility, John Rivera adjusted the torque settings outside the validated range for Batch ZYX234 due to a misinterpretation of the setup sheet. Naomi Ellis, the shift lead, did not verify the torque setting. A 100% manual reinspection was initiated, Batch ZYX234 placed on hold, and the SOP was updated for clarity.


### Sentence transformers
Ressources:
- [link1](https://www.sbert.net/docs/quickstart.html#cross-encoder)
- [link2](https://www.geeksforgeeks.org/nlp/sentence-similarity-using-bert-transformer/)
- [Sentence Transformer documentation](https://sbert.net/docs/quickstart.html)

#### Bi-encoder

In [27]:
from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2") # this model has 256 as seq length

# The sentences to encode
sentences = [
    ref_answ,
    answer,
]

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)

(2, 384)
tensor([[1.0000, 0.8500],
        [0.8500, 1.0000]], device='cuda:0')


In [28]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

#### Cross-encoder

In [18]:
from sentence_transformers.cross_encoder import CrossEncoder

# 1. Load a pretrained CrossEncoder model
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

# We want to compute the similarity between the query sentence...
query = ref_answ

# ... and all sentences in the corpus
corpus = [ref_answ, answer, ]

# 2. We rank all sentences in the corpus for the query
ranks = model.rank(query, corpus)

# Print the scores
print("Query: ", query)
for rank in ranks:
    print(f"{rank['score']:.2f}\tcorpus_id={rank['corpus_id']}") #

Query:  On July 7, 2025, at 2:15 PM, during operations on Capping Line A in the Sterile Facility, John Rivera adjusted the torque settings outside the validated range for Batch ZYX234 due to a misinterpretation of the setup sheet. Naomi Ellis, the shift lead, did not verify the torque setting. A 100% manual reinspection was initiated, Batch ZYX234 placed on hold, and the SOP was updated for clarity.
8.93	corpus_id=0
7.32	corpus_id=1


#### BERT score
References:
- [evaluate github](https://github.com/huggingface/evaluate)
- [HF bert score](https://huggingface.co/spaces/evaluate-metric/bertscore)

In [19]:
!pip install evaluate



In [20]:
from evaluate import load
bertscore = load("bertscore")
predictions = [answer]
references = [ref_answ]
results = bertscore.compute(predictions=predictions, references=references, model_type="distilbert-base-uncased")
print(results)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

{'precision': [0.9033188819885254], 'recall': [0.9413809776306152], 'f1': [0.92195725440979], 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.53.2)'}


# TODO: metrics class and test bench