In [1]:
!pip install textstat
!pip install groq

Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.17.0 textstat-0.7.4
Collecting groq
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Downloading groq-0.11.0-py3-none-any.whl (106 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.11.0


In [2]:
from datetime import datetime
import math
import textstat
import spacy

def retrieve_json_info(json_data, top_five_regions):
    model_used = json_data[0]['Models_used']
    scan_type = json_data[0]['Data']['Scan_type']
    metrics = json_data[0]['Metrics']
    heatmaps_str = json_data[0]['Visual_heatmaps']
    heatmaps = [json.loads(h) for h in heatmaps_str.split('\n') if h]
    json_input = ""

    json_input += " Models used:"
    json_input += f" Classification: {model_used['Classification_model']} \t"
    json_input += f" Visual explainability: {model_used['Visual_explainability_model']} \t"

    json_input += f" Scan type: {scan_type} \t"
    
    json_input += " Metrics:\n"
    json_input += f" ROC AUC: {metrics['ROC_AUC']}"
    json_input += f" Accuracy: {metrics['accuracy']}"
    json_input += f" Precision: {metrics['precision']}"
    json_input += f" Recall: {metrics['recall']}"
    json_input += f" F1 Score: {metrics['f1_score']} \t"
    
    if top_five_regions == True:
        top_heatmaps = sorted(heatmaps, key=lambda x: x['%_Heatmap'], reverse=True)[:5]
        for entry in top_heatmaps:
            json_input += f"{entry['Region']}: "
            json_input += f"{entry['%_Heatmap']:.2f}%"
            json_input += f", {entry['%_Region']:.2f}% -\t "
    else:
        for entry in heatmaps:
            json_input += f"{entry['Region']}: "
            json_input += f"{entry['%_Heatmap']:.2f}%"
            json_input += f", {entry['%_Region']:.2f}% -\t "
    return json_input

def do_inference(prompt, model):
    starting_time = datetime.now()
    
    model.config.pad_token_id = model.config.eos_token_id

    inputs = tokenizer(prompt, return_tensors="pt")
    inputs.to(device)
    output = model.generate(inputs.input_ids,
                            attention_mask=inputs.attention_mask,
                            max_length=2048)  
    ending_time = datetime.now()
    total_time = ending_time - starting_time
    return tokenizer.decode(output[0]), total_time.total_seconds()

def textual_metrics(input, language):
    
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(input.lower())
    
    tokens = [token.text for token in doc if not token.is_punct]
    unique_tokens = set(tokens)

    ttr = len(unique_tokens) / len(tokens) #type-token ration
    #ms = (math.log(len(unique_tokens)) - math.log(len(tokens))) / (math.log(len(unique_tokens)**2)) # Maas' score
    
    if language == 'EN':                                                           
        read = textstat.flesch_reading_ease(input)  # Flesch reading ease test, only for English
        fog_index = textstat.gunning_fog(input)
        dale_chall = textstat.dale_chall_readability_score(input)  
        ari_score = textstat.automated_readability_index(input)
        colemanliau = textstat.coleman_liau_index(input)
    elif language == 'IT':
        read = textstat.gulpease_index(input)
        fog_index = 0
        dale_chall = 0
  
    print(f'TTR: {ttr} \n Readability:: {read}\n FOG index: {fog_index}\n Dale-Chall: {dale_chall}\n ARI score: {ari_score} \n Coleman-Liau: {colemanliau}')


In [4]:
import os
import json
from groq import Groq

os.environ['GROQ_API_KEY']="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"

with open("/kaggle/input/json-reports-2/json_report_t1w_CN (202).json", "r") as f:
    json_cn = json.load(f)
    
with open("/kaggle/input/json-reports-2/json_report_t1w_cMCI (156).json", "r") as f:
    json_cmci = json.load(f)

with open("/kaggle/input/json-reports-2/json_report_t1w_ncMCI (205).json", "r") as f:
    json_ncmci = json.load(f)

with open("//kaggle/input/json-reports-2/json_report_t1w_AD (60).json", "r") as f:
    json_ad = json.load(f)


json_cn=retrieve_json_info(json_cn, top_five_regions=False)
json_cmci=retrieve_json_info(json_cmci, top_five_regions=False)
json_ncmci=retrieve_json_info(json_ncmci, top_five_regions=False)
json_ad=retrieve_json_info(json_ad, top_five_regions=False)

prompt1_en = f"""The provided data are derived from a brain MRI study {json_cmci}. Each row describes an area of the Julich-Brain Atlas, the percentages of the heatmap within each region and the second percentage indicating the impact on the region. Please generate a report in which you describe the functioning of each region, according to the Julich-Brain Atlas, noting the percentages of the heatmap within each region and the impact on the region. Provide explanations in a clear manner that can be easily understood by medical professionals. Discuss the potential reasons why the model might have focused on this region, given its known roles and functions in the brain. Please don't insert the name of the classification and the visual explainability models used. Explain the clinical implications of the model's focus on this region, such as how it might encourage further investigation into early signs of the mild cognitive condition of the Alzheimer's Disease."""

prompt2_en_cn_ncmci = f"""The provided data are derived from a brain MRI study {json_cn}, {json_ncmci}. The first json data is about a patient classified by the model as cognitively normal (CN), the second is a non-converter-MCI case. (ncMCI). \
Each section of the visual heatmaps describes an area of the Julich-Brain Atlas, the percentages of the heatmap within each region and the second percentage indicating the impact on the region. \
Compare the regions highlighted in the two patients, analyze the regions in common and regions that don’t and the level of focus of the model, explaining the known roles of brain regions in Alzheimer's disease and in converter MCI case. Explain the clinical implications of the model's focus on this region, such as how it might encourage further investigation into early signs of the mild cognitive condition of the Alzheimer's Disease. Please don't insert into the report the name of classification and visual explainability models used.
Please remember that the heatmap does not describe areas that are affected by the disease, but the areas that were the most relevant for the model to reach its classification."""

prompt2_en_cn_ad = f"""The provided data are derived from a brain MRI study {json_cn}, {json_ad}. The first json data is about a patient classified by the model as cognitively normal (CN), the second is an Alzheimer’s disease case. (AD). \
Each section of the visual heatmaps describes an area of the Julich-Brain Atlas, the percentages of the heatmap within each region and the second percentage indicating the impact on the region. \
Compare the regions highlighted in the two patients, analyze the regions in common and regions that don’t and the level of focus of the model, explaining the known roles of brain regions in Alzheimer's disease. Explain the clinical implications of the model's focus on this region, such as how it might encourage further investigation into early signs of the mild cognitive condition of the Alzheimer's Disease. Please don't insert into the report the name of classification and visual explainability models used.
Please remember that the heatmap does not describe areas that are affected by the disease, but the areas that were the most relevant for the model to reach its classification."""

prompt2_en_cmci_ncmci = f"""The provided data are derived from a brain MRI study {json_cmci}, {json_ncmci}. The first json data is about a patient classified by the model as converter MCI (cMCI), the second is non-converter case (ncMCI). \
Each section of the visual heatmaps describes an area of the Julich-Brain Atlas, the percentages of the heatmap within each region and the second percentage indicating the impact on the region. \
Compare the regions highlighted in the two patients, analyze the regions in common and regions that don’t and the level of focus of the model, explaining the known roles of brain regions in Alzheimer's disease. Explain the clinical implications of the model's focus on this region, such as how it might encourage further investigation into early signs of the mild cognitive condition of the Alzheimer's Disease. Please don't insert into the report the name of classification and visual explainability models used.
Please remember that the heatmap does not describe areas that are affected by the disease, but the areas that were the most relevant for the model to reach its classification."""



models = ["llama3-8b-8192",
         "mixtral-8x7b-32768",
         "gemma2-9b-it",
         "gemma-7b-it",
         "llama-3.1-70b-versatile"]

client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt2_en_cmci_ncmci
        }
    ],
    model=models[2])

output_text = chat_completion.choices[0].message.content
print(output_text)

textual_metrics(output_text, 'EN')

This data provides interesting insights into how a model used to differentiate between converter MCI (cMCI) and non-converter MCI (ncMCI) patients focuses on specific brain regions.  

**Commonalities and Differences:**

While the exact regions highlighted vary across the three patients, we notice some common trends.  For instance, areas like the  **medial geniculate body**, **Broca's area**, and **the  anterior intra-parietal sulcus** are frequently emphasized.  These regions play crucial roles in auditory processing, language, and attention, all of which are known to be affected in Alzheimer's disease (AD) progression.  

On the other hand, variations exist in the specific regions highlighted and their level of importance. This suggests that the model might be capturing subtle differences in the **pattern of brain involvement** between cMCI and ncMCI patients, even at an early stage. 

**Clinical Implications:**

The model's focus on these specific brain regions has significant clini