In [14]:
import pandas as pd
import json
import numpy as np
import os
from tqdm import tqdm
import sys
import os
import numpy as np
from polnet.utils import *
from polnet import lio
from polnet import tem
from polnet import poly as pp
import shutil
import random
import time
from polnet.stomo import (
    SynthTomo,
    SetTomos,
)

In [15]:

def get_absolute_paths(parent_dir):
    """
    Get absolute paths of all directories inside a given directory.
    
    Parameters:
        parent_dir (str): Path to the parent directory.
    
    Returns:
        list: A list of absolute paths of subdirectories.
    """
    return [os.path.abspath(os.path.join(parent_dir, d)) for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d))]




In [16]:

def generate_tomograms(out_base_dir, simulation_dirs, n_tomos=5, tilt_range=(-60, 60, 3), 
                       detector_snr=[1.0, 2.0], simulation_threshold=float("inf"), save_micrographs=False):
    simulation_index = 0
    for sim_dir in sorted(simulation_dirs):
        if simulation_index > simulation_threshold:
            break
        # Check if the simulation directory exists
        if not os.path.exists(sim_dir):
            print(f"Simulation directory {sim_dir} does not exist. Skipping.")
            continue
        # Check if the simulation directory is empty
        if not os.listdir(sim_dir):
            print(f"Simulation directory {sim_dir} is empty. Skipping.")
            continue
        print(f"Processing simulation directory: {sim_dir}")
        tom_dir = os.path.join(sim_dir, "tomos")
        if not os.path.exists(sim_dir):
            raise FileNotFoundError("Tomogram directory not found.")
        sys.path.append(os.path.dirname(sim_dir))
        
        tem_dir = os.path.join(sim_dir, "tem")
        if not os.path.exists(tem_dir):
            raise FileNotFoundError("TEM directory not found.") 
        for tomod_id in range(n_tomos):
            print("GENERATING TOMOGRAM NUMBER:", tomod_id)
            hold_time = time.time()
            # Create a separate output directory for each tomogram inside the simulation directory
            tomo_output_dir = os.path.join(out_base_dir, "ExperimentRuns", f"tomogram_{simulation_index}_{tomod_id}")
            os.makedirs(tomo_output_dir, exist_ok=True)
            
            # File paths
            tomo_den_out = os.path.join(tom_dir, f"tomo_den_{tomod_id}.mrc")
            tomo_lbls_out = os.path.join(tom_dir, f"tomo_lbls_{tomod_id}.mrc")
            poly_den_out = os.path.join(tom_dir, f"poly_den_{tomod_id}.vtp")
            poly_skel_out = os.path.join(tom_dir, f"poly_skel_{tomod_id}.vtp")
            
            if not all(os.path.exists(f) for f in [tomo_den_out, tomo_lbls_out, poly_den_out, poly_skel_out]):
                raise FileNotFoundError("One or more required input files are missing.")
            
            synth_tomo = SynthTomo()
            synth_tomo.set_den(tomo_den_out)
            synth_tomo.set_poly(poly_den_out)
            
            # TEM for 3D reconstructions
            temic = tem.TEM(tem_dir)
            vol = lio.load_mrc(tomo_den_out)
            temic.gen_tilt_series_imod(vol, np.arange(*tilt_range), ax="Y")
            temic.add_mics_misalignment(1, 1.5, 0.2)
            
            # Apply SNR noise
            if detector_snr:
                snr = round(random.uniform(detector_snr[0], detector_snr[1]), 2) if len(detector_snr) > 1 else detector_snr[0]
                temic.add_detector_noise(snr)
            
            temic.invert_mics_den()
            temic.set_header(data="mics", p_size=(10, 10, 10))
            temic.recon3D_imod()
            temic.set_header(data="rec3d", p_size=(10, 10, 10), origin=(0, 0, 0))
            
            out_mics = os.path.join(tomo_output_dir, f"tomo_mics_{tomod_id}_snr{snr}.mrc")
            out_tomo_rec = os.path.join(tomo_output_dir, f"tomo_rec_{tomod_id}_snr{snr}.mrc")
            
            # Conditionally save the micrographs
            if save_micrographs:
                shutil.copyfile(os.path.join(tem_dir, "out_micrographs.mrc"), out_mics)
            
            shutil.copyfile(os.path.join(tem_dir, "out_rec3d.mrc"), out_tomo_rec)
            
            synth_tomo.set_mics(out_mics if save_micrographs else None)
            synth_tomo.set_tomo(out_tomo_rec)
            
            print(f"Tomogram {tomod_id} processed in {time.time() - hold_time:.2f} seconds.")
        simulation_index += 1
        print(f"Simulation {simulation_index} processed.")
    print("Successfully terminated all simulations.")

"""simulation_dirs = get_absolute_paths("/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated")
out_base_dir = "/Users/yusufberkoruc/Desktop/Master_thesis/polnet/train/static"
generate_tomograms(out_base_dir,simulation_dirs, n_tomos=5)
"""

'simulation_dirs = get_absolute_paths("/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated")\nout_base_dir = "/Users/yusufberkoruc/Desktop/Master_thesis/polnet/train/static"\ngenerate_tomograms(out_base_dir,simulation_dirs, n_tomos=5)\n'

In [17]:

def quaternion_to_matrix(q1, q2, q3, q4):
    """Convert quaternion to a 4x4 transformation matrix."""
    # Normalize quaternion
    norm = np.sqrt(q1**2 + q2**2 + q3**2 + q4**2)
    q1, q2, q3, q4 = q1 / norm, q2 / norm, q3 / norm, q4 / norm

    # Create rotation matrix
    rotation_matrix = np.array([
        [1 - 2*(q2**2 + q3**2), 2*(q1*q2 - q3*q4), 2*(q1*q3 + q2*q4), 0],
        [2*(q1*q2 + q3*q4), 1 - 2*(q1**2 + q3**2), 2*(q2*q3 - q1*q4), 0],
        [2*(q1*q3 - q2*q4), 2*(q2*q3 + q1*q4), 1 - 2*(q1**2 + q2**2), 0],
        [0, 0, 0, 1]
    ])
    return rotation_matrix.tolist()

def csv_to_json(csv_file, json_directory, labels_table):
    # Read CSV file with pandas (using tab separator)
    df = pd.read_csv(csv_file, sep='\t')

    # Read labels table to create a mapping from Code to protein names
    labels_df = pd.read_csv(labels_table, sep='\t')
    code_to_protein = dict(zip(labels_df['LABEL'], labels_df['MODEL']))
    get_name = lambda x: x.split("/")[1].split(".")[0]
    code_to_protein = {k: get_name(v) for k, v in code_to_protein.items()}  # Convert keys to strings
    # Filter rows where Type is either 'SAWCL' or 'Mb-SAWLC'
    df_filtered = df[df['Type'].isin(['SAWLC', 'Mb-SAWLC'])]

    # Group by the 'Code' column to create separate JSON files for each protein
    grouped = df_filtered.groupby('Label')

    # Create the JSON directory if it doesn't exist
    os.makedirs(json_directory, exist_ok=True)

    # Iterate over each group (protein code)
    for label, group in grouped:
        # Get the protein name from the mapping
        protein_name = code_to_protein.get(label, str(label))  # Use code as fallback if not found

        # Initialize JSON structure for this protein
        json_data = {
            "pickable_object_name": protein_name,  # Use the protein name
            "user_id": "curation",
            "session_id": "0",
            "run_name": "TS_5_4",
            "voxel_spacing": None,
            "unit": "angstrom",
            "points": [],
            "trust_orientation": True
        }

        # Iterate over rows in the group
        for _, row in group.iterrows():
            # Extract relevant fields
            x = float(row['X'])
            y = float(row['Y'])
            z = float(row['Z'])
            instance_id = int(row['Label'])  # Use Label as instance_id
            q1 = float(row['Q1'])
            q2 = float(row['Q2'])
            q3 = float(row['Q3'])
            q4 = float(row['Q4'])

            # Convert quaternion to transformation matrix
            transformation = quaternion_to_matrix(q1, q2, q3, q4)

            # Add point to JSON
            json_data['points'].append({
                "location": {"x": x, "y": y, "z": z},
                "transformation_": transformation,
                "instance_id": instance_id
            })

        # Define the output JSON file path
        json_file = os.path.join(json_directory, f"{protein_name}.json")

        # Write JSON to file
        with open(json_file, mode='w') as file:
            json.dump(json_data, file, indent=4)

        #print(f"Created JSON file for protein '{protein_name}' at {json_file}")



In [18]:
def main(in_csv_list, out_dir, csv_dir_list,filter_types, labels_table):
    """
    Main function to split a CSV file by density tomograms, filter by Type, and convert to JSON.

    Parameters:
        in_csv_list (list): List of paths to the input CSV files.
        out_dir (str): Path to the output directory where split CSVs and JSON files will be saved.
        csv_dir_list (list): List of directories where split CSVs will be stored.
        filter_types (list): List of Type values to filter by.
        labels_table (str): Path to the labels table CSV file.
    """
    # Create the output directory if it doesn't exist
    os.makedirs(out_dir, exist_ok=True)
    simulation_index = 0
    for in_csv,csv_dir in zip(in_csv_list,csv_dir_list):
        # Create a directory for the CSV files if it doesn't exist
        os.makedirs(csv_dir, exist_ok=True)
        # Load the input CSV file into a DataFrame
        df = pd.read_csv(in_csv, sep='\t')
        df = df.drop(columns=['Tomo3D', 'Micrographs'], errors='ignore')
        
        # Group the DataFrame by the 'Density' column
        grouped = df.groupby('Density')

        # Iterate over each group and save to a separate CSV file
        for density, group in tqdm(grouped):
            density_csv = density.split("/")[-1].split(".")[0]
            density = density.split("/")[-1].split(".")[0].split("_")[-1]
            # Check if the density is empty or None
            if density == None or density == "":
                continue
            # Filter by 'Type' column if it exists
            """if 'Type' in group.columns:
                group = group[group['Type'].isin(filter_types)]"""
            # Construct the output file path
            
            csv_file_path = os.path.join(csv_dir, f'{density_csv}.csv')  # Specify the file name
            group.to_csv(csv_file_path, sep='\t', index=False)          
            # Create a JSON directory for each CSV file    
            json_output_dir = os.path.join(out_dir,"ExperimentRuns", f"tomogram_{simulation_index}_{density}","Picks")
            
            csv_to_json(csv_file_path, json_output_dir, labels_table)
        simulation_index += 1
        print(f"Simulation {simulation_index} processed.")
    print("Successfully terminated all simulations.")

In [19]:

def get_tomos_motif_list_paths(master_dir):
    """
    Get the absolute paths of 'tomos_motif_list.csv' from all subdirectories in a master directory.
    
    Parameters:
        master_dir (str): Path to the master directory containing subdirectories.
    
    Returns:
        list: A list of absolute paths to 'tomos_motif_list.csv' files in each subdirectory.
    """
    motif_paths = []
    
    # Iterate through all subdirectories in the master directory
    for subdir, _, files in os.walk(master_dir):
        if 'tomos_motif_list.csv' in files:
            # Construct the absolute path for the csv file
            motif_paths.append(os.path.abspath(os.path.join(subdir, 'tomos_motif_list.csv')))
    
    return motif_paths



In [24]:
in_csv_list = get_tomos_motif_list_paths("/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated")
in_csv_list = sorted(in_csv_list)
print(in_csv_list)
out_dir = "/Users/yusufberkoruc/Desktop/Master_thesis/polnet/train/overlay"
csv_dir_list = get_absolute_paths("/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated")
csv_dir_list = [os.path.join(d, "csv") for d in csv_dir_list]
print(csv_dir_list)
filter_types = ['SAWLC', 'Mb-SAWLC']
labels_table = "/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v1/labels_table.csv"
main(in_csv_list, out_dir, csv_dir_list,filter_types, labels_table)

['/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v1/tomos_motif_list.csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v2/tomos_motif_list.csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v3/tomos_motif_list.csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v4/tomos_motif_list.csv']
['/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v4/csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v3/csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v2/csv', '/Users/yusufberkoruc/Desktop/Master_thesis/polnet/data_simulated/all_v1/csv']


100%|██████████| 5/5 [01:11<00:00, 14.34s/it]


Simulation 1 processed.


100%|██████████| 5/5 [01:04<00:00, 12.81s/it]


Simulation 2 processed.


100%|██████████| 5/5 [01:07<00:00, 13.44s/it]


Simulation 3 processed.


100%|██████████| 5/5 [01:08<00:00, 13.61s/it]


Simulation 4 processed.
Successfully terminated all simulations.


In [None]:
import os
import re

def delete_tomo_mics_files(base_dir):
    """
    Deletes files with the 'tomo_mics_...' naming structure in the given directory and its subdirectories.

    Parameters:
        base_dir (str): Path to the base directory containing tomogram directories.
    """
    # Regular expression to match 'tomo_mics_...' file names
    pattern = re.compile(r"^tomo_mics_.*\.mrc$")

    # Walk through the directory and its subdirectories
    for root, _, files in os.walk(base_dir):
        for file in files:
            # Check if the file matches the pattern
            if pattern.match(file):
                file_path = os.path.join(root, file)
                try:
                    os.remove(file_path)
                    print(f"Deleted: {file_path}")
                except Exception as e:
                    print(f"Failed to delete {file_path}: {e}")

# Specify the base directory
base_dir = "/Users/yusufberkoruc/Desktop/Master_thesis/polnet/train/static/ExperimentRuns"

# Call the function
delete_tomo_mics_files(base_dir)