In [96]:
import os
import json
import csv
from bs4 import BeautifulSoup

# Directories and file paths
mriqc_failed_dir = "ds004636/derivatives/mriqc_failed"
mriqc_passed_dir = "ds004636/derivatives/mriqc_passed"

csv_file = "quality_metrics/BOLD_quality_metrics.csv"
mriqc_passed_txt = "metadata/mriqc_passed/mriqc_passed_fullname.txt"
mriqc_failed_txt = "metadata/mriqc_failed/mriqc_failed_fullname.txt"

# Task names to filter
TASKS = {"ANT", "CCTHot", "WATT3", "StopSignal", "TwoByTwo", "DPX", "DiscountFix", "MotorSelectiveStop", "Stroop", "SurveyMedley"}

In [97]:
def parse_filename(filename):
    """
        # takes in file names from mriqc_failed.txt or mriqc_passed.txt
        # and returns subject and task

    Args:
        filename (string): _description_

    Returns:
        subject (string)
        task (str)
    """
    parts = filename.split("_")
    subject = parts[0].replace("sub-", "")
    task = next((t for t in TASKS if t in filename), None)
    return subject, task


def extract_metrics(html_path, metrics):
    """given the full html name, and metrics (found in BOLD_quality_metrics.csv)
    extract relevant information from the html files, populates the data dictionary with 
    key(metric) and value(metric value) pairs

    Args:
        html_path (_type_): _description_
        metrics (_type_): _description_

    Returns:
        data: a dictionary with keys metric names and values: metric values e
    """
    with open(html_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")
    
    data = {}

    # relevant metric key and values are under "other" on html files
    other_section = soup.find(id="other")
    if not other_section:
        print(f" other table not found for {html_path}")
        return data  # Return empty if "Other" section is not found
    
    # in the other section, get the table with id "iqms-table" which is where ketric key-value pairs are listed
    table = other_section.find_next("table", {"id": "iqms-table"})
    if not table:
        print(f"iqms table not found for {html_path}")
        return data  # Return empty if table is not found
    
    # scrape the info, assign them to metric_name and value
    for row in table.find_all("tr"):
        cells = row.find_all("td")
        if len(cells) == 2:
            metric_name = cells[0].text.strip()
            value = cells[1].text.strip()
        elif len(cells) == 3:
            # for when metric has two parts seperated by "_"
            metric_name = f"{cells[0].text.strip()}_{cells[1].text.strip()}"
            value = cells[2].text.strip()
        else:
            # print("else")
            continue
        
        # add everything to data dictionary
        if metric_name in metrics:
            try:
                data[metric_name] = float(value)
            except ValueError:
                data[metric_name] = value
    
    return data

In [98]:

# Organize data into JSON
# quality_data = {"mriqc_failed": {}, "mriqc_passed": {}}

# Read metric names from CSV (BOLD_quality_metrics)
metrics = set()
with open(csv_file, "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        metric_name = row['Metric'] #changed this later, didn't check, might have problem!!!
        metrics.add(metric_name)
        
# # Process both mriqc_failed_txt and mriqc_passed_txt files.
# for category, txt_file in [("mriqc_failed", mriqc_failed_txt), ("mriqc_passed", mriqc_passed_txt)]:
#     with open(txt_file, "r") as f:

#         # filenames is a list of all full html file names in failed and passed combined
#         filenames = [line.strip() for line in f]

#     # sets mriqc_dir as the current directory
#     if category == "mriqc_failed": 
#         mriqc_dir = mriqc_failed_dir
#     else:
#         mriqc_dir = mriqc_passed_dir

#     # for each html in mriqc_passed or mriqc_failed, get the metrics using extract_metrics function 
#     for filename in filenames:
#         html_path = os.path.join(mriqc_dir, filename)
#         if os.path.isfile(html_path):
#             subject, task = parse_filename(filename)
#             if task:
#                 if task not in quality_data[category]:
#                     quality_data[category][task] = {}
#                 if subject not in quality_data[category][task]:
#                     quality_data[category][task][subject] = {}
#                 quality_data[category][task][subject] = extract_metrics(html_path, metrics)
#         else:
#             print(f"not a path name {filename}")
    
# # Save quality_data to json file.
# with open("quality_metrics/quality_metrics_all.json", "w") as f:
#     json.dump(quality_data, f, indent=4)

# print("JSON file created successfully!")

In [99]:
# def reformat_json(input_json, output_json):
#     """Reformats jason with a following hierarchy: 
#     from: category/task/subject/metric-value
#     to: category/task/metric/subject-value
#     creates output_json

#     Args:
#         input_json (_type_): _description_
#         output_json (_type_): _description_
#     """
#     with open(input_json, "r") as f:
#         data = json.load(f)
    
#     reformatted = {"mriqc_failed": {}, "mriqc_passed": {}}
    
#     for category in ["mriqc_failed", "mriqc_passed"]:
#         for task, subjects in data.get(category, {}).items():
#             if task not in reformatted[category]:
#                 reformatted[category][task] = {}
            
#             for subject, metrics in subjects.items():
#                 for metric, value in metrics.items():
#                     if metric not in reformatted[category][task]:
#                         reformatted[category][task][metric] = {}
                    
#                     reformatted[category][task][metric][subject] = value
    
#     with open(output_json, "w") as f:
#         json.dump(reformatted, f, indent=4)

In [100]:
# File paths
input_json = "quality_metrics/quality_metrics_all.json"
output_json = "quality_metrics/quality_metrics_all_reformatted.json"
metrics_csv = "quality_metrics/BOLD_quality_metrics.csv"

# Run functions
# reformat_json(input_json, output_json)

In [101]:
# saves metrics for plot functions
# dictionary saving low, median, and high values for each metric
thresholds = dict()

# opens metrics csv (listing names and thresholds)
with open(metrics_csv, "r") as f:
    reader = csv.DictReader(f)  # Read CSV as dictionary
    for row in reader:
        # metric = metric name
        metric = row["Metric"]  # Get the metric name

        # thresholds = dictionary with keys: metric, values: low, high, and median
        low, high, median = float(row["Low"]), float(row["High"]), float(row["Median"])  # Convert to floats
        thresholds[metric] = (low, high, median)

In [102]:
metric = "gsr_y"
values = [0.04774569720029831,0.04930886626243591,0.11757352203130722,0.03400810435414314,
                0.030550967901945114,
                0.028645630925893784,
                0.030653437599539757]

if metric in thresholds:
    (low, high, median) = thresholds[metric]
    std_dev = (high - low) / 2  # Approximate standard deviation #0.09
    mid = (high + low) / 2
    values = [(v - mid) / std_dev for v in values]

print(values)



[-0.6090883276564011, -0.5924588695485542, 0.13376087267348097, -0.7552329324027326, -0.7920109797665414, -0.8122805220649598, -0.790920876600641]


In [103]:
import pandas as pd
import matplotlib.pyplot as plt

# plots task-metric 
def plot_task_metric(task, metric, data, thresholds, save_dir):
    """_summary_

    Args:
        task (_type_): _description_
        metric (_type_): _description_
        data (_type_): _description_
        thresholds (_type_): _description_
        save_dir (str, optional): _description_. Defaults to "quality_metrics/plots".
    """
    os.makedirs(save_dir, exist_ok=True)
    
    subjects = []
    values = []
    colors = []
    categories = []
    
    # Extract data for both "mriqc_failed" and "all_included"
    for category in ["mriqc_failed","mriqc_passed"]:
        if task in data[category] and metric in data[category][task]:
            for subject, value in data[category][task][metric].items():
                subjects.append(subject)
                values.append(value)
                categories.append(category)  # Keep track of whether it's "mriqc_failed" or "all_included"
    
    # Z-normalize values using Low, High, and Median from thresholds
    if metric in thresholds:
        (low, high, median) = thresholds[metric]
        std_dev = (high - low) / 2  # Approximate standard deviation
        mid = (high + low) / 2
        values = [(v - mid) / std_dev for v in values]

    # Assign colors (red if outside range, green if inside)
    for value in values:
        if -1 <= value <= 1:  # Within expected range
            colors.append("green")
        else:
            colors.append("red")
    
    # Separate "mriqc_failed" from "mriqc_passed" in the plot
    failed_count = sum(1 for cat in categories if cat == "mriqc_failed")
    
    fig_width = max(len(subjects) * 0.5,6)  # Store the calculated height
    # Increase figure height
    fig, ax = plt.subplots(figsize=(fig_width,6))
    
    ax.scatter(range(len(subjects)), values, c=colors, s=50)
    # Add a horizontal separation line between failed and included
    # if failed_count > 0:
    ax.axvline(x=failed_count - 0.5, color="black", linestyle="--")
        # if (len(values)-1 > failed_count):
    if failed_count >=3:
        ax.text(-0.5, 1.8, "mriqc_failed", fontsize=8)
        ax.text(failed_count - 0.3, 1.8, "mriqc_passed", fontsize=8)

    elif failed_count >0:
        ax.text(-0.5, 1.8, "mriqc_failed", fontsize=8)
        ax.text(failed_count - 0.3, 1.55, "mriqc_passed", fontsize=8)

    else:
        ax.text(failed_count - 0.3, 1.8, "mriqc_passed", fontsize=8)


    # Set y-axis labels (subjects)
    ax.set_yticks([-2,-1,0,1,2])
    
    # Add plot title and labels
    plt.title(f"{task} - {metric}", fontsize=20, pad=20, y= 1)
    plt.ylabel("Z-Normalized Value", fontsize=12)
    plt.xlabel("Subjects", fontsize=12)
    
    # Draw shaded gray region for acceptable range (-1 to 1)
    ax.axhspan(-1, 1, color="gray", alpha=0.2)    
    # Add small annotations for value ranges
    
    if values[-1] > 0:
        ax.text(len(subjects)-0.5, -0.8, "within\nrange", ha="right", fontsize=8)
    else:
        ax.text(len(subjects)-0.5, 0.7, "within\nrange", ha="right", fontsize=8)
    ax.text(len(subjects)-0.5, -1.2, "low",  ha="right", fontsize=8)
    ax.text(len(subjects)-0.5, 1.1, "high",  ha="right", fontsize=8)

    # Ensure x-axis ticks are visible
    # plt.xticks(range(int(min(values)) - 1, int(max(values)) + 2))
    plt.xticks(range(len(subjects)), subjects, rotation=45, ha='right')
    plt.ylim(-2,2)

    # Add legend
    ax.scatter([], [], color="green", label="In range")
    ax.scatter([], [], color="red", label="Outside range")

    ax.legend(loc="upper right")

    # Adjust margins
    plt.subplots_adjust(left=0.2, right=0.8, top=0.8, bottom=0.2)
    # Save plot
    plt.savefig(os.path.join(save_dir, f"{task}-{metric}.png"), dpi=300)
    plt.close()

In [104]:
# try the plot function in one file to make sure it's working correctly: 
with open(output_json, "r") as f:
    data = json.load(f)  # Use json.load() instead of json.loads()
    plot_task_metric("ANT", "dvars_vstd", data, thresholds, save_dir="quality_metrics")

In [105]:
# save all combinations of task-metric plots
with open(output_json, "r") as f:
    data = json.load(f) 
    for task in TASKS:
        # print(task)
        for metric in list(thresholds.keys()):

            # for each unique task-metric combination, plot a graph 
            # data(json file), thresholds (dictionary)
            plot_task_metric(task, metric, data, thresholds, save_dir="quality_metrics/plots/task-metric")


In [106]:
# combine task-metric plots per task plots. All same subject plots should be on the same canvas.
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import defaultdict

# Directory containing the plots
plots_dir = "quality_metrics/plots/task-metric"

# Organize images by subject_id
task_plots = defaultdict(list)

# Find all PNG files in the directory
png_files = glob.glob(os.path.join(plots_dir, "*.png"))  # Recursively get all PNGs

# Group plots by subject_id
for file in png_files:
    filename = os.path.basename(file).removesuffix(".png")  # Extracts "category-subjectid-task.png"
    parts = filename.split("-")  # Split by '-'
    
    task, metric = parts[0], parts[1]  
    task_plots[task].append((file, metric))

# Generate combined plots for each subject
output_dir = "quality_metrics/plots/task-metric/combined_task-metric_plots"
os.makedirs(output_dir, exist_ok=True)

for task, plots in task_plots.items():
    num_plots = len(plots)
    cols = 3  # Limit columns to 4 for readability
    rows = 2  # Compute rows based on number of plots

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))  # Adjust figure size
    fig.suptitle(f"{task} across Metrics", fontsize=16, y = 0.95)

    # Flatten axes if only one row or column
    axes = axes.flatten() if num_plots > 1 else [axes]

    for ax, (file, metric) in zip(axes, sorted(plots, key=lambda x: x[1])):  # Sort by category
        img = mpimg.imread(file)
        ax.imshow(img)
        # ax.set_title(f"{task} - {metric}", fontsize=10, y=0.95)
        ax.axis("off")

    # Hide unused subplots if any
    for i in range(len(plots), len(axes)):
        axes[i].axis("off")

    # Save the combined figure
    plt.tight_layout(rect=[0, 0, 1, 1])  # Adjust layout to fit title
    output_path = os.path.join(output_dir, f"{task}_across_metrics.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

print(f"Combined plots saved in {output_dir}")

Combined plots saved in quality_metrics/plots/task-metric/combined_task-metric_plots


In [107]:
# CREATE PDF FOR TASK-METRIC
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Directory containing the combined subject-task plots
combined_plots_dir = "quality_metrics/plots/task-metric/combined_task-metric_plots"
output_pdf = os.path.join("plots_important", "task-metric.pdf")

# Find all PNG images in the directory
image_files = sorted(glob.glob(os.path.join(combined_plots_dir, "*.png")))

# Create a high-quality PDF
with PdfPages(output_pdf) as pdf:
    for img_file in image_files:
        img = Image.open(img_file)  # Open the image with Pillow
        fig, ax = plt.subplots(figsize=(img.width / 100, img.height / 100), dpi=300)  # Preserve size
        ax.imshow(img)
        ax.axis("off")  # Hide axes
        pdf.savefig(fig, bbox_inches="tight", dpi=300)  # Save figure to PDF
        plt.close(fig)  # Close figure to free memory

print(f"High-quality PDF saved at: {output_pdf}")

High-quality PDF saved at: plots_important/task-metric.pdf


In [108]:
# plot subject task
import pandas as pd
import matplotlib.pyplot as plt

def plot_subject_task(category, task, subject, data, thresholds, save_dir):
    """_summary_

    Args:
        task (_type_): _description_
        metric (_type_): _description_
        data (_type_): _description_
        thresholds (_type_): _description_
        save_dir (str, optional): _description_. Defaults to "quality_metrics/plots".
    """
    os.makedirs(save_dir, exist_ok=True)
    
    metrics = []
    values = []
    colors = []
    categories = []
    

    # Extract data for both "mriqc_failed" and "all_included"
    # takes in quality_metrics_all (not reformatted)
    category = category
    if task in data[category] and subject in data[category][task]:
        for metric, value in data[category][task][subject].items():
            metrics.append(metric)
            values.append(value)
            categories.append(category)  # Keep track of whether it's "mriqc_failed" or "all_included"
    
    # Z-normalize values using Low, High, and Median from thresholds
    # update the values array with normalized values.
    for metric in metrics:
        (low, high, median) = thresholds[metric]
        std_dev = (high - low) / 2  # Approximate standard deviation
        mid = (high + low) / 2
        value_idx = metrics.index(metric)
        values[value_idx] = (values[value_idx] - mid) / std_dev

    # Assign colors (red if outside range, green if inside)
    for value in values:
        if -1 <= value <= 1:  # Within expected range
            colors.append("green")
        else:
            colors.append("red")
    
    
    # Increase figure height
    fig, ax = plt.subplots(figsize=(8, 6))
    
    ax.scatter(range(len(metrics)), values, c=colors, s=50)

    # Set y-axis labels (subjects)
    ax.set_yticks([-2, -1, 0, 1, 2])
    
    # Add plot title and labels
    plt.title(f"{subject} - {task} - {category.strip("mriqc_")}", fontsize=20, pad=20, y= 1)
    plt.ylabel("Z-Normalized Value", fontsize=12)
    plt.xlabel("Metrics", fontsize=12)
    
    # Draw shaded gray region for acceptable range (-1 to 1)
    ax.axhspan(-1, 1, color="gray", alpha=0.2)
    
    # Add small annotations for value ranges
    len_metric = len(metrics)-1+0.1
    if values[-1] < 0:
        ax.text(len_metric, 0.7, "within\nrange", verticalalignment='center', ha="right", fontsize=8)
    else: 
        ax.text(len_metric, -0.8, "within\nrange", verticalalignment='center', ha="right",fontsize=8)
    ax.text(len_metric, -1.2, "low", verticalalignment='center', ha="right",fontsize=8)
    ax.text(len_metric, 1.1, "high", verticalalignment='center', ha="right",fontsize=8)

    # Ensure x-axis ticks are visible
    # plt.xticks(range(int(min(values)) - 1, int(max(values)) + 2)
    plt.xticks(range(6), metrics)
    
    # Add legend
    ax.scatter([], [], color="green", label="In range")
    ax.scatter([], [], color="red", label="Outside range")

    ax.legend(loc="upper right")

    plt.ylim(-2,2)
    # Adjust margins
    plt.subplots_adjust(left=0.2, right=0.8, top=0.8, bottom=0.2)
    
    # Save plot
    plt.savefig(os.path.join(save_dir, f"{category.strip("mriqc_")}-{subject}-{task}.png"), dpi=300)
    plt.close()


In [109]:
# try the plot function in one file to make sure it's working correctly: 
output_json = "quality_metrics/quality_metrics_all.json"
with open(output_json, "r") as f:
    data = json.load(f)  # Use json.load() instead of json.loads()
    plot_subject_task("mriqc_failed", "CCTHot", "s607", data, thresholds, save_dir="quality_metrics/plots/subject-task")

In [110]:
# save all combinations of subject-task plots
with open(output_json, "r") as f:
    data = json.load(f) 
    for category,task in data.items(): 
        for task, subject in task.items(): 
            for subject,metrics in subject.items():
                # for each unique task-metric combination, plot a graph 
                # data(json file), thresholds (dictionary)
                plot_subject_task(category, task, subject, data, thresholds, save_dir="quality_metrics/plots/subject-task")

In [111]:
# combine subject-task plots in a canvas per subject. All same subject plots should be on the same canvas.

import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import defaultdict

# Directory containing the plots
plots_dir = "quality_metrics/plots/subject-task"

# Organize images by subject_id
subject_plots = defaultdict(list)

# Find all PNG files in the directory
png_files = glob.glob(os.path.join(plots_dir, "*.png"))  # Recursively get all PNGs

# Group plots by subject_id
for file in png_files:
    filename = os.path.basename(file)  # Extracts "category-subjectid-task.png"
    parts = filename.split("-")  # Split by '-'
    
    category, subject_id, task = parts[0], parts[1], parts[2] 
    subject_plots[subject_id].append((file, category, task))

# Generate combined plots for each subject
output_dir = "quality_metrics/plots/subject-task/combined_subject-task_plots"
os.makedirs(output_dir, exist_ok=True)

for subject_id, plots in subject_plots.items():
    num_plots = len(plots)
    cols = min(num_plots, 4)  # Limit columns to 4 for readability
    rows = (num_plots + cols - 1) // cols  # Compute rows based on number of plots

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))  # Adjust figure size

    fig.suptitle(f"{subject_id} across Tasks", fontsize=16, y= (0.92 if (len(plots)+1)//4 ==0 else 0.95))

    # Flatten axes if only one row or column
    axes = axes.flatten() if num_plots > 1 else [axes]

    for ax, (file, category, task) in zip(axes, sorted(plots, key=lambda x: x[1])):  # Sort by category
        img = mpimg.imread(file)
        ax.imshow(img)
        # ax.set_title(f"{category} - {task}", fontsize=10)
        ax.axis("off")

    # Hide unused subplots if any
    for i in range(len(plots), len(axes)):
        axes[i].axis("off")

    # Save the combined figure
    plt.tight_layout(rect=[0, 0, 1, 1])  # Adjust layout to fit title
    output_path = os.path.join(output_dir, f"{subject_id}_combined.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

print(f"Combined plots saved in {output_dir}")

Combined plots saved in quality_metrics/plots/subject-task/combined_subject-task_plots


In [112]:
# combine task subject plots, canvas per task
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import defaultdict

# Directory containing the plots
plots_dir = "quality_metrics/plots/subject-task"

# Organize images by subject_id
task_plots = defaultdict(list)

# Find all PNG files in the directory
png_files = glob.glob(os.path.join(plots_dir, "*.png"))  # Recursively get all PNGs

# Group plots by subject_id
for file in png_files:
    filename = os.path.basename(file).removesuffix(".png")  # Extracts "category-subjectid-task.png"
    parts = filename.split("-")  # Split by '-'
    
    category, subject_id, task = parts[0], parts[1], parts[2] 
    task_plots[task].append((file, category, subject_id))

# Generate combined plots for each subject
output_dir = "quality_metrics/plots/subject-task/combined_task-subject_plots"
os.makedirs(output_dir, exist_ok=True)

for task, plots in task_plots.items():
    num_plots = len(plots)
    cols = 4  # Limit columns to 4 for readability
    rows = ((num_plots-1)// cols)+1  # Compute rows based on number of plots

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))  # Adjust figure size
    fig.suptitle(f"{task} across Subjects", fontsize=16, y= (0.9))

    # Flatten axes if only one row or column
    axes = axes.flatten() if num_plots > 1 else [axes]

    for ax, (file, category, subject) in zip(axes, sorted(plots, key=lambda x: (x[1], x[2]))):          
        img = mpimg.imread(file)
        ax.imshow(img)
        # ax.set_title(f"{category} - {subject}", fontsize=10)
        ax.axis("off")

    # Hide unused subplots if any
    for i in range(len(plots), len(axes)):
        axes[i].axis("off")

    # Save the combined figure
    plt.tight_layout(rect=[0, 0, 1, 0.9])  # Adjust layout to fit title
    output_path = os.path.join(output_dir, f"{task}_combined.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

print(f"Combined plots saved in {output_dir}")

Combined plots saved in quality_metrics/plots/subject-task/combined_task-subject_plots


In [113]:
# CREATE PDF USING CANVAS
# change the directories to create the pdfs
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Directory containing the combined subject-task plots
combined_plots_dir = "quality_metrics/plots/subject-task/combined_subject-task_plots"
output_pdf = os.path.join("plots_important/subject_task.pdf")

# Find all PNG images in the directory
image_files = sorted(glob.glob(os.path.join(combined_plots_dir, "*.png")))

# Create a high-quality PDF
with PdfPages(output_pdf) as pdf:
    for img_file in image_files:
        img = Image.open(img_file)  # Open the image with Pillow
        fig, ax = plt.subplots(figsize=(img.width / 100, img.height / 100), dpi=300)  # Preserve size
        ax.imshow(img)
        ax.axis("off")  # Hide axes
        pdf.savefig(fig, bbox_inches="tight", dpi=300)  # Save figure to PDF
        plt.close(fig)  # Close figure to free memory

print(f"High-quality PDF saved at: {output_pdf}")

High-quality PDF saved at: plots_important/subject_task.pdf


In [114]:
# # reformat json for subject-metrics plotting
# # Input and output file paths
# input_json = "quality_metrics/quality_metrics_all_reformatted.json"
# output_json = "quality_metrics/quality_metrics_reformatted_subject-metrics.json"

# # Load the original JSON data
# with open(input_json, "r") as f:
#     data = json.load(f)

# # Reformat the data
# reformatted_data = {}

# for category, tasks in data.items():
#     for task, metrics in tasks.items():
#         for metric, subjects in metrics.items():
#             for subject, value in subjects.items():
#                 if category not in reformatted_data:
#                     reformatted_data[category] = {}
#                 if subject not in reformatted_data[category]:
#                     reformatted_data[category][subject] = {}
#                 if metric not in reformatted_data[category][subject]:
#                     reformatted_data[category][subject][metric] = {}
                
#                 reformatted_data[category][subject][metric][task] = value

# # Ensure the output directory exists
# os.makedirs("quality_metrics", exist_ok=True)

# # Save the reformatted JSON
# with open(output_json, "w") as f:
#     json.dump(reformatted_data, f, indent=4)

# print(f"Reformatted JSON saved to {output_json}")


In [115]:
import pandas as pd
import matplotlib.pyplot as plt

# plots subject-metric
def plot_subject_metric(subject, metric, data, thresholds, save_dir):
    """_summary_

    Args:
        task (_type_): _description_
        metric (_type_): _description_
        data (_type_): _description_
        thresholds (_type_): _description_
        save_dir (str, optional): _description_. Defaults to "quality_metrics/plots".
    """
    os.makedirs(save_dir, exist_ok=True)
    
    tasks = []
    values = []
    colors = []
    categories = []
    
    # Extract data for both "mriqc_failed" and "all_included"
    for category in ["mriqc_failed","mriqc_passed"]:
        if subject in data[category] and metric in data[category][subject]:
            for task, value in data[category][subject][metric].items():
                tasks.append(task)
                values.append(value)
                categories.append(category)  # Keep track of whether it's "mriqc_failed" or "all_included"
    
    # Z-normalize values using Low, High, and Median from thresholds
    if metric in thresholds:
        (low, high, median) = thresholds[metric]
        std_dev = (high - low) / 2  # Approximate standard deviation
        mid = (high + low) / 2
        values = [(v - mid) / std_dev for v in values]

    # Assign colors (red if outside range, green if inside)
    for value in values:
        if -1 <= value <= 1:  # Within expected range
            colors.append("green")
        else:
            colors.append("red")
    
    # Separate "mriqc_failed" from "mriqc_passed" in the plot
    failed_count = sum(1 for cat in categories if cat == "mriqc_failed")
    passed_count = sum(1 for cat in categories if cat == "mriqc_passed")

    fig_width = max(len(tasks) * 0.5,8)  # Store the calculated height
    # Increase figure height
    fig, ax = plt.subplots(figsize=(fig_width,6))
    
    ax.scatter(range(len(tasks)), values, c=colors, s=50)
    # Add a horizontal separation line between failed and included
    # if failed_count > 0
        # if (len(values)-1 > failed_count):
    if (failed_count > 0) and (passed_count) > 0:
        ax.axvline(x=failed_count - 0.5, color="black", linestyle="--")
    
    if failed_count >=4:
        ax.text(-.15, -1.9, "mriqc_failed", fontsize=8)
        if passed_count >= 1:
            ax.text(failed_count - 0.3, -1.9, "mriqc_passed", fontsize=8)
        else:
            pass
    
    elif failed_count >=1:
        ax.text(0, -1.8, "mriqc_failed", fontsize=8)
        if passed_count >= 1:
            ax.text(failed_count - 0.3, -1.9, "mriqc_passed", fontsize=8)
        else:
            pass
    else:
        ax.text(0, -1.9, "mriqc_passed", fontsize=8)


    # Set y-axis labels (subjects)
    ax.set_yticks([-2,-1,0,1,2])
    
    # Add plot title and labels
    plt.title(f"{subject} - {metric}", fontsize=20, pad=20, y= 1)
    plt.ylabel("Z-Normalized Value", fontsize=12)
    plt.xlabel("Tasks", fontsize=12, loc="center")
    
    # Draw shaded gray region for acceptable range (-1 to 1)
    ax.axhspan(-1, 1, color="gray", alpha=0.2)    
    # Add small annotations for value ranges
    if values[-1] > 0:
        ax.text(len(tasks)-1, -0.8, "within\nrange", ha="center", fontsize=8)
    else:
        ax.text(len(tasks)-1, 0.7, "within\nrange", ha="center", fontsize=8)
    ax.text(len(tasks)-1, -1.2, "low",  ha="center", fontsize=8)
    ax.text(len(tasks)-1, 1.1, "high",  ha="center", fontsize=8)

    # Ensure x-axis ticks are visible
    # plt.xticks(range(int(min(values)) - 1, int(max(values)) + 2))
    plt.xticks(range(len(tasks)), tasks, rotation=45, ha='right')
    plt.ylim(-2,2)

    # Add legend
    ax.scatter([], [], color="green", label="In range")
    ax.scatter([], [], color="red", label="Outside range")

    ax.legend(loc="upper right")
    plt.tight_layout() # Adjusts layout to prevent labels from being clipped

    # Adjust margins
    plt.subplots_adjust(left=0.2, right=0.8, top=0.8, bottom=0.3)
    plt.margins(x=0.05) 
    # Save plot
    plt.savefig(os.path.join(save_dir, f"{subject}-{metric}.png"), dpi=300)
    plt.close()

In [116]:
output_json = "quality_metrics/quality_metrics_reformatted_subject-metrics.json"
# try the plot function in one file to make sure it's working correctly: 
with open(output_json, "r") as f:
    data = json.load(f)  # Use json.load() instead of json.loads()
    plot_subject_metric("s608", "dvars_vstd", data, thresholds, save_dir="quality_metrics/plots/subject-metric")

In [117]:
output_json = "quality_metrics/quality_metrics_reformatted_subject-metrics.json"

with open(output_json, "r") as f:
    data = json.load(f) 
    for category, subjects in data.items():  # subjects is a dict
        for subject, metrics in subjects.items():  # subject is now the key (name)
            for metric, tasks in metrics.items():  # tasks is a dict
                # for each unique subject-metric combination, plot a graph 
                # data(json file), thresholds (dictionary)
                plot_subject_metric(subject, metric, data, thresholds, save_dir="quality_metrics/plots/subject-metric")

In [118]:
# combine subject-metric plots in one canvas. All same subject plots should be on the same canvas.
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from collections import defaultdict

# Directory containing the plots
plots_dir = "quality_metrics/plots/subject-metric"

# Organize images by subject_id
subject_plots = defaultdict(list)

# Find all PNG files in the directory
png_files = glob.glob(os.path.join(plots_dir, "*.png"))  # Recursively get all PNGs

# Group plots by subject_id
for file in png_files:
    filename = os.path.basename(file).removesuffix(".png")  # Extracts "category-subjectid-task.png"
    parts = filename.split("-")  # Split by '-'
    
    subject, metric = parts[0], parts[1]  
    subject_plots[subject].append((file, metric))

# Generate combined plots for each subject
output_dir = "quality_metrics/plots/subject-metric/combined_subject-metric_plots"
os.makedirs(output_dir, exist_ok=True)

for subject, plots in subject_plots.items():
    num_plots = len(plots)
    cols = 3
    rows = 2

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))  # Adjust figure size
    fig.suptitle(f"{subject} across Metrics", fontsize=16, y=92)

    # Flatten axes if only one row or column
    axes = axes.flatten() if num_plots > 1 else [axes]

    for ax, (file, metric) in zip(axes, sorted(plots, key=lambda x: x[1])): 
        img = mpimg.imread(file)
        ax.imshow(img)
        # ax.set_title(f"{subject} - {metric}", fontsize=10)
        ax.axis("off")

    # Hide unused subplots if any
    for i in range(len(plots), len(axes)):
        axes[i].axis("off")

    # Save the combined figure
    plt.tight_layout(rect=[0, 0, 1, 0.95])  # Adjust layout to fit title
    output_path = os.path.join(output_dir, f"{subject}_combined.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

print(f"Combined plots saved in {output_dir}")

Combined plots saved in quality_metrics/plots/subject-metric/combined_subject-metric_plots


In [119]:
# CREATE PDF FOR TASK-METRIC
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Directory containing the combined subject-task plots
combined_plots_dir = "quality_metrics/plots/subject-metric/combined_subject-metric_plots"
output_pdf = os.path.join("plots_important", "subject-metric.pdf")

# Find all PNG images in the directory
image_files = sorted(glob.glob(os.path.join(combined_plots_dir, "*.png")))

# Create a high-quality PDF
with PdfPages(output_pdf) as pdf:
    for img_file in image_files:
        img = Image.open(img_file)  # Open the image with Pillow
        fig, ax = plt.subplots(figsize=(img.width / 100, img.height / 100), dpi=300)  # Preserve size
        ax.imshow(img)
        ax.axis("off")  # Hide axes
        pdf.savefig(fig, bbox_inches="tight", dpi=300)  # Save figure to PDF
        plt.close(fig)  # Close figure to free memory

print(f"High-quality PDF saved at: {output_pdf}")

High-quality PDF saved at: plots_important/subject-metric.pdf
