In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path("..") / "src"))

#Extract the text from the pdf
from extraction import extract_sangram
extract_sangram.extract_pdf("data/raw_pdfs/sample.pdf")

Files written to: /home/ubuntu/CITS5553/Project/outputs/processed_text
Preview (first 8 sentences):
Understanding the Significance of Layer Bonding in Melt Christopher D. Lamb, Brooke Maitland, Matt S. Hepburn, Tim R. Dargaville, Brendan F. Kennedy, Paul D. Dalton, Adrian Keating, and Elena M. De-Juan-Pardo* Melt electrowriting (MEW) is a high-resolution additive manufacturing technology capable of depositing micrometric fibers onto a moving collector to form 3D scaffolds of controlled mechanical properties.
While the critical role of layer bonding to achieve mechanical integrity in fused deposition modeling has been widely reported, it remains largely unknown in MEW, in part due to a lack of methods to assess it.
Here, a systematic framework is developed to unravel the significance of layer bonding in MEW scaffolds and its ultimate effect on their mechanical properties.
Results show that printing parameters, scaffold design, and print path have a strong impact on layer bonding strengt

In [2]:
#Checking all-MiniLM-L6-v2/intfloat/e5-base-v2 for embedding:
from sentence_transformers import SentenceTransformer


model = SentenceTransformer("intfloat/e5-base-v2")
input_path = Path("../outputs/processed_text/sample.sentences.txt")
sentences = [f"passage: {line.strip()}" for line in input_path.read_text(encoding="utf-8").splitlines() if line.strip()]


embeddings = model.encode(sentences, convert_to_tensor=True)
print("Embeddings shape:", embeddings.shape)

  from .autonotebook import tqdm as notebook_tqdm


Embeddings shape: torch.Size([354, 768])


In [3]:
from sentence_transformers.util import cos_sim
import torch

query = "query: Analysis (CMCA) at the UWA." #Dummy query

device = embeddings.device
query_embedding = model.encode(query, convert_to_tensor=True).to(device)

#correct shape
query_embedding = query_embedding.unsqueeze(0)

#Compare
scores = cos_sim(query_embedding, embeddings)
top_k = torch.topk(scores, k=5)
for idx, score in zip(top_k.indices[0], top_k.values[0]):
    print(f"Match: {sentences[idx]} (Score={score.item():.4f})")

Match: passage: In Situ Layer Bonding Analysis Through Optical Coherence In the context of MEW and the complex interplay of various parameters, there is a growing need for adequate assessment (Score=0.8030)
Match: passage: The authors acknowledge the facilities, and the scientific and technical assistance  of Microscopy Australia at the Center for Microscopy, Characterization  & Analysis, The University of Western Australia, a facility funded by the University, State, and Commonwealth Governments. (Score=0.8024)
Match: passage: Proposed framework to assess layer bonding in melt electrowriting (MEW). (Score=0.7905)
Match: passage: 8] R. McMaster, C. Hoefner, A. Hrynevich, C. Blum, M. Wiesner, K. Wittmann, T. R. Dargaville, P. Bauer-Kreisel, J. Groll, P. D. Dalton, T. Blunk, Adv. (Score=0.7896)
Match: passage: For mechanical testing both uniaxial and biaxial data, statistical analysis was performed using a twoway  ANOVA with Tukey’s multiple comparisons test. (Score=0.7843)


In [4]:
#Top k based on the dummy query
print(top_k)

torch.return_types.topk(
values=tensor([[0.8030, 0.8024, 0.7905, 0.7896, 0.7843]], device='cuda:0'),
indices=tensor([[143, 301,  35, 270, 255]], device='cuda:0'))


In [5]:
#Map indices to sentences
top_k = torch.topk(scores, k=5, dim=1)
idxs = top_k.indices[0].tolist()
top_k_sentences = [sentences[i] for i in idxs] 

In [6]:
top_k_sentences

['passage: In Situ Layer Bonding Analysis Through Optical Coherence In the context of MEW and the complex interplay of various parameters, there is a growing need for adequate assessment',
 'passage: The authors acknowledge the facilities, and the scientific and technical assistance  of Microscopy Australia at the Center for Microscopy, Characterization  & Analysis, The University of Western Australia, a facility funded by the University, State, and Commonwealth Governments.',
 'passage: Proposed framework to assess layer bonding in melt electrowriting (MEW).',
 'passage: 8] R. McMaster, C. Hoefner, A. Hrynevich, C. Blum, M. Wiesner, K. Wittmann, T. R. Dargaville, P. Bauer-Kreisel, J. Groll, P. D. Dalton, T. Blunk, Adv.',
 'passage: For mechanical testing both uniaxial and biaxial data, statistical analysis was performed using a twoway  ANOVA with Tukey’s multiple comparisons test.']

In [None]:
from openai import OpenAI

client = OpenAI(api_key=" API KEY ")  # or use `api_key=os.getenv("OPENAI_API_KEY")`

#Function to build the LLM prompt from top k sentences - This prompt has to be designed carefully
def build_prompt(top_k):
    bullet_sentences = '\n'.join(f"- {s.replace('passage: ', '')}" for s in top_k_sentences)
    return f"""
You are a research auditor verifying whether a document includes an acknowledgement of support.

Below are several sentences extracted from the document:
{bullet_sentences}

Task:
Based on the above sentences, determine whether the document contains an acknowledgement of support, funding, or institutional contribution (e.g. from CMCA, UWA, funding agencies, labs, or facilities).

Respond in **this exact format**:

Answer: [Yes or No]

Reason: [One or two sentence justification]
"""

In [None]:
import openai

openai.api_key = "API KEY"

#Function to create prompt using top k sentences
def verify_acknowledgement(top_k_sentences):
    prompt = build_prompt(top_k_sentences)

    response = openai.ChatCompletion.create(
        model="gpt-4", 
        messages=[
            {"role": "system", "content": "You are a research auditor."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.0
    )

    return response['choices'][0]['message']['content']


In [9]:
#Function to call LLM
def verify_acknowledgement(top_k_sentences):
    prompt = build_prompt(top_k_sentences)

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a research auditor."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.0
    )

    return response.choices[0].message.content

In [10]:
#Run and show the result
result = verify_acknowledgement(top_k_sentences)
print("\nLLM Response:\n")
print(result)


LLM Response:

Answer: Yes

Reason: The document includes an acknowledgement of support from Microscopy Australia at the Center for Microscopy, Characterization & Analysis, The University of Western Australia, which indicates institutional contribution and support.


In [11]:
top_k_sentences

['passage: In Situ Layer Bonding Analysis Through Optical Coherence In the context of MEW and the complex interplay of various parameters, there is a growing need for adequate assessment',
 'passage: The authors acknowledge the facilities, and the scientific and technical assistance  of Microscopy Australia at the Center for Microscopy, Characterization  & Analysis, The University of Western Australia, a facility funded by the University, State, and Commonwealth Governments.',
 'passage: Proposed framework to assess layer bonding in melt electrowriting (MEW).',
 'passage: 8] R. McMaster, C. Hoefner, A. Hrynevich, C. Blum, M. Wiesner, K. Wittmann, T. R. Dargaville, P. Bauer-Kreisel, J. Groll, P. D. Dalton, T. Blunk, Adv.',
 'passage: For mechanical testing both uniaxial and biaxial data, statistical analysis was performed using a twoway  ANOVA with Tukey’s multiple comparisons test.']