In [20]:
import os
import json
import csv
from bs4 import BeautifulSoup

# Directories and file paths
mriqc_dir = "ds004636/derivatives/mriqc"
csv_file = "quality_metrics/BOLD_quality_metrics.csv"
all_included_file = "all_included/all_included.txt"
mriqc_failed_file = "mriqc_failed/mriqc_failed.txt"

# Task names to filter
TASKS = {"ANT", "CCTHot", "WATT3", "stopSignal", "twoByTwo", "DPX", "discountFix", "motorSelectiveStop", "stroop", "surveyMedley"}

In [33]:
# Function to parse filenames
def parse_filename(filename):
    parts = filename.split("_")
    subject = parts[0].replace("sub-", "")
    task = next((t for t in TASKS if t in filename), None)
    return subject, task

def extract_metrics(html_path, metrics):
    with open(html_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")
    
    data = {}
    other_section = soup.find(id="other")
    if not other_section:
        print(f" other table not found for {html_path}")
        return data  # Return empty if "Other" section is not found
    
    table = other_section.find_next("table", {"id": "iqms-table"})
    if not table:
        print(f"iqms table not found for {html_path}")
        return data  # Return empty if table is not found
    
    for row in table.find_all("tr"):
        cells = row.find_all("td")
        if len(cells) == 2:
            metric_name = cells[0].text.strip()
            value = cells[1].text.strip()
        elif len(cells) == 3:
            # print("idk")
            metric_name = f"{cells[0].text.strip()}_{cells[1].text.strip()}"
            value = cells[2].text.strip()
        else:
            # print("else")
            continue
        
        if metric_name in metrics:
            try:
                data[metric_name] = float(value)
            except ValueError:
                data[metric_name] = value
    
    return data

In [35]:

# Organize data into JSON
quality_data = {"mriqc_failed": {}, "all_included": {}}

# Read metric names from CSV
metrics = set()
with open(csv_file, "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        metric_name = row['Metric'] #changed this later, didn't check, might have problem!!!
        metrics.add(metric_name)
        
# Process both files
for category, txt_file in [("mriqc_failed", mriqc_failed_file), ("all_included", all_included_file)]:
    with open(txt_file, "r") as f:
        filenames = [line.strip() for line in f]

    for filename in filenames:
        html_path = os.path.join(mriqc_dir, filename)
        if os.path.isfile(html_path):
            subject, task = parse_filename(filename)
            if task:
                if task not in quality_data[category]:
                    quality_data[category][task] = {}
                if subject not in quality_data[category][task]:
                    quality_data[category][task][subject] = {}
                quality_data[category][task][subject] = extract_metrics(html_path, metrics)

# Save to JSON file
with open("quality_metrics/quality_metrics.json", "w") as f:
    json.dump(quality_data, f, indent=4)

print("JSON file created successfully!")

JSON file created successfully!


In [37]:
def reformat_json(input_json, output_json):
    with open(input_json, "r") as f:
        data = json.load(f)
    
    reformatted = {"mriqc_failed": {}, "all_included": {}}
    
    for category in ["mriqc_failed", "all_included"]:
        for task, subjects in data.get(category, {}).items():
            if task not in reformatted[category]:
                reformatted[category][task] = {}
            
            for subject, metrics in subjects.items():
                for metric, value in metrics.items():
                    if metric not in reformatted[category][task]:
                        reformatted[category][task][metric] = {}
                    
                    reformatted[category][task][metric][subject] = value
    
    with open(output_json, "w") as f:
        json.dump(reformatted, f, indent=4)

In [43]:
# File paths
input_json = "quality_metrics/quality_metrics.json"
output_json = "quality_metrics/quality_metrics_reformatted.json"
metrics_csv = "quality_metrics/BOLD_quality_metrics.csv"
output_dir = "quality_metrics/plots"

# Run functions
# reformat_json(input_json, output_json)

In [42]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_task_metric(json_file, metrics_csv, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    with open(json_file, "r") as f:
        data = json.load(f)
    
    metrics_df = pd.read_csv(metrics_csv)
    metric_bounds = {row["Metric"]: (row["Low"], row["High"], row["Median"]) for _, row in metrics_df.iterrows()}
     
    for category in ["mriqc_failed", "all_included"]:
        for task, metrics in data.get(category, {}).items():
            for metric, subjects in metrics.items():
                if metric not in metric_bounds:
                    continue  # Skip if metric info is missing
                
                low, high, median = metric_bounds[metric]
                values = [(subj, (val - median) / ((high - low) / 2)) for subj, val in subjects.items()]
                
                if not values:
                    continue  # Skip empty lists
                
                values.sort(key=lambda x: x[0])  # Sort by subject ID
                subjects_list, z_scores = zip(*values)
                
                colors = ["green" if low <= val * ((high - low) / 2) + median <= high else "red" for val in z_scores]
                
                plt.figure(figsize=(8, len(subjects_list) * 0.3))
                plt.scatter(z_scores, range(len(subjects_list)), c=colors, edgecolors='black')
                plt.axvspan(-1, 1, color='gray', alpha=0.3)  # Shade normal range
                
                plt.yticks(range(len(subjects_list)), subjects_list)
                plt.axhline(y=len(data.get("mriqc_failed", {}).get(task, {}).get(metric, {})), color='black', linestyle='--')
                plt.xlabel("Z-Normalized Value")
                plt.ylabel("Subjects")
                plt.title(f"{task}-{metric} Plot")
                
                plt.savefig(os.path.join(output_dir, f"{task}-{metric} Plot.png"))
                plt.close()

In [44]:
plot_task_metric(output_json, metrics_csv, output_dir)