# Generate a synthetic dataset

This notebook starts the process to generate a synthetic dataset taken already created structural models.

In [1]:
from core.vtk_utilities import *
from core.utilities import *
from core.widgets_utilities import *
from core.all_features2 import all_features2
from core.tk_utilities import *
import io

## Select files with the structural models 

Select the files that contain the structural models you want to have in your synthetic dataset. At least an input model is required.

We have 4 types of models and 4 buttons to adds it:

- Membrane files: list of membranes files to use in the application these have **mbs** extension
- Filament files: list of filament files to use in the application these have **hns** extension
- Macromolecule (protein) files: list of macromolecule files to use in the application these have **pns** extension
- Membrane protein files: list of membrane macromolecule files to use in the application these have **pms** extension

  
**Note** you can select many (an all) files in a folder with the usage of **'Ctrl'** key.

Remember to select the membrane proteins that you have already **aligned** otherwise the application will not work correctly.


In [2]:
selected_files_path_membrane = []
selected_files_path_helix = []
selected_files_path_proteins= []
selected_files_path_mproteins = []
MEMBRANES_LIST = []
HELIX_LIST = []
PROTEINS_LIST = []
MB_PROTEINS_LIST = []

def select_files_membrane(btn):
    global selected_files_path_membrane
    select_files(selected_files_path_membrane, ".mbs")
    add_files(MEMBRANES_LIST, selected_files_path_membrane)
    update_dropdown(MEMBRANES_LIST, dropdown_membrane)
 
    
def select_files_helix(btn):
    global selected_files_path_helix
    select_files(selected_files_path_helix, ".hns")
    add_files(HELIX_LIST, selected_files_path_helix)
    update_dropdown(HELIX_LIST, dropdown_helix)


def select_files_proteins(btn):
    global selected_files_path_proteins
    select_files(selected_files_path_proteins, ".pns")
    add_files(PROTEINS_LIST, selected_files_path_proteins)
    update_dropdown(PROTEINS_LIST, dropdown_proteins)


def select_file_mproteins(btn):
    global selected_files_path_mproteins
    select_files(selected_files_path_mproteins, ".pms")
    add_files(MB_PROTEINS_LIST, selected_files_path_mproteins)
    update_dropdown(MB_PROTEINS_LIST, dropdown_mproteins)


(select_file_button_membrane, dropdown_membrane,select_file_button_helix, dropdown_helix, select_file_button_proteins,
 dropdown_proteins, select_file_button_mproteins, dropdown_mproteins) = widgets_add_app_files()
select_file_button_membrane.on_click(select_files_membrane)
select_file_button_helix.on_click(select_files_helix)
select_file_button_proteins.on_click(select_files_proteins)
select_file_button_mproteins.on_click(select_file_mproteins)

VBox(children=(Label(value='Types of structures:'), HBox(children=(Button(description='Select membrane files',…

## Sort the models

Models selected can be sorted to determine the specific order to be simulated. Labels in ground truth will correspond with the order defined here starting with label '1' ('0' is reserved for background).

In [3]:
def up_file_m(b):
    index, options = up_file_priority(MEMBRANES_LIST,  MEMBRANES_WIDGETS[2].value)
    if len(options)>0:
        MEMBRANES_WIDGETS[2].options = options
        MEMBRANES_WIDGETS[2].value = options[index]

def down_file_m(b):
    index, options = down_file_priority(MEMBRANES_LIST,  MEMBRANES_WIDGETS[2].value)
    if len(options)>0:
        MEMBRANES_WIDGETS[2].options = options
        MEMBRANES_WIDGETS[2].value = options[index]
    
def up_file_p(b):
    index, options = up_file_priority(PROTEINS_LIST,  PROTEINS_WIDGETS[2].value)
    if len(options) >0:
        PROTEINS_WIDGETS[2].options = options
        PROTEINS_WIDGETS[2].value = options[index]

def down_file_p(b):
    index, options = down_file_priority(PROTEINS_LIST,  PROTEINS_WIDGETS[2].value)
    if len(options) >0:
        PROTEINS_WIDGETS[2].options = options
        PROTEINS_WIDGETS[2].value = options[index]
    
def up_file_mb(b):
    index, options = up_file_priority(MB_PROTEINS_LIST,  MB_PROTEINS_WIDGETS[2].value)
    if len(options) > 0:
        MB_PROTEINS_WIDGETS[2].options = options
        MB_PROTEINS_WIDGETS[2].value = options[index]

def down_file_mb(b):
    index, options = down_file_priority(MB_PROTEINS_LIST,  MB_PROTEINS_WIDGETS[2].value)
    if len(options) > 0:
        MB_PROTEINS_WIDGETS[2].options = options
        MB_PROTEINS_WIDGETS[2].value = options[index]
    
def up_file_h(b):
    index, options = up_file_priority(HELIX_LIST,  HELIX_WIDGETS[2].value)
    if len(options) > 0:
        HELIX_WIDGETS[2].options = options
        HELIX_WIDGETS[2].value = options[index]

def down_file_h(b):
    index, options = down_file_priority(HELIX_LIST,  HELIX_WIDGETS[2].value)
    if len(options) > 0:
        HELIX_WIDGETS[2].options = options
        HELIX_WIDGETS[2].value = options[index]

def update_dropdown_widget(widget, values):
    widget.options = values
    widget.value = values[0] if values else None

def on_membranes_list_change(btn):
    update_dropdown_widget(MEMBRANES_WIDGETS[2], MEMBRANES_LIST)
def on_helix_list_change(btn):
    update_dropdown_widget(HELIX_WIDGETS[2], HELIX_LIST)
def on_proteins_list_change(btn):
    update_dropdown_widget(PROTEINS_WIDGETS[2], PROTEINS_LIST)
def on_mb_proteins_list_change(btn):
    update_dropdown_widget(MB_PROTEINS_WIDGETS[2], MB_PROTEINS_LIST)


select_file_button_membrane.on_click(on_membranes_list_change)
select_file_button_helix.on_click(on_helix_list_change)
select_file_button_proteins.on_click(on_proteins_list_change)
select_file_button_mproteins.on_click(on_mb_proteins_list_change)

(MEMBRANES_WIDGETS, HELIX_WIDGETS, PROTEINS_WIDGETS, MB_PROTEINS_WIDGETS)= widgets_change_order([MEMBRANES_LIST, HELIX_LIST, PROTEINS_LIST, MB_PROTEINS_LIST])

MEMBRANES_WIDGETS[0].on_click(up_file_m)
MEMBRANES_WIDGETS[1].on_click(down_file_m)
HELIX_WIDGETS[0].on_click(up_file_h)
HELIX_WIDGETS[1].on_click(down_file_h)
PROTEINS_WIDGETS[0].on_click(up_file_p)
PROTEINS_WIDGETS[1].on_click(down_file_p)
MB_PROTEINS_WIDGETS[0].on_click(up_file_mb)
MB_PROTEINS_WIDGETS[1].on_click(down_file_mb)

VBox(children=(HBox(children=(Button(description='↑ Up selected file', style=ButtonStyle()), Button(descriptio…

## Execution

Once defined the list of models to be used in the simulation, select the parameters related with the simulation and image acquisition press **Exec** to run the simulation.

In [5]:
## DEF_PATH = os.path.realpath(os.getcwd() + '/../data') +  '/../data_generated/polnet_test'
def generate_voi_shape():
    return (voi_shape1.value, voi_shape2.value, voi_shape3.value)

def generate_tilts_angs():
    return range(widget_min.value, widget_max.value, widget_paso.value)

def generate_voi_off():
    return ((voi_off_widget_1.value, voi_off_widget_2.value),
            (voi_off_widget_3.value, voi_off_widget_4.value),
            (voi_off_widget_5.value, voi_off_widget_6.value))



def exec_app(btn):
    with output_widget:
        path = check_dir(widget_out_dir.selected_path, DEF_PATH)
        if MEMBRANES_LIST or HELIX_LIST or PROTEINS_LIST or MB_PROTEINS_LIST:
            all_features2(ntomos_widget.value, generate_voi_shape(),
                          path, generate_voi_off(), voi_size_widget.value,
                          mmer_tries_widget.value, pmer_tries_widget.value,
                          MEMBRANES_LIST, HELIX_LIST, PROTEINS_LIST, MB_PROTEINS_LIST,
                          surf_dec_widget.value,
                          generate_tilts_angs(), [detector_snr_widget_low.value, detector_snr_widget_high.value],
                          malign_mn_widget.value, malign_mx_widget.value, malign_sg_widget.value)
        else:
            window_exec_app_failed()


(widget_out_dir, ntomos_widget, voi_shape1, voi_shape2, voi_shape3, voi_off_widget_1, voi_off_widget_2, voi_off_widget_3, voi_off_widget_4, 
voi_off_widget_5, voi_off_widget_6,voi_size_widget, mmer_tries_widget, 
pmer_tries_widget, surf_dec_widget, malign_mn_widget, malign_mx_widget, malign_sg_widget, 
detector_snr_widget_low, detector_snr_widget_high, widget_min, widget_max, widget_paso, exec_button) = widgets_exec_app()

output_widget = widgets.Output()

exec_button.on_click(exec_app)
display(output_widget)

FileChooser(path='D:\tfg\polnet\gui', filename='', title='Select where you want to save the output files:', sh…

IntText(value=1, description='N_TOMOS (number of tomograms in the dataset):', layout=Layout(width='380px'), st…

HBox(children=(BoundedIntText(value=400, description='VOI_SHAPE (Tomogram shape, voxels):', layout=Layout(widt…

HBox(children=(BoundedIntText(value=4, description='VOI_OFF (Empty halo, voxels):', layout=Layout(width='250px…

BoundedFloatText(value=10.0, description='VOI_VOXEL_SIZE (Voxel size, voxels/A):', layout=Layout(width='350px'…

BoundedFloatText(value=20.0, description='MMER_TRIES (Maximun number of tries for monomers):', layout=Layout(w…

BoundedFloatText(value=100.0, description='PMER_TRIES (Maximun number of tries for polymers):', layout=Layout(…

BoundedFloatText(value=0.9, description='SURF_DEC (Decimation for surface respresentation, [0, 1]):', layout=L…

BoundedFloatText(value=1.0, description='MALIGN_MN (Micrograph miss-alginment mean, pixels):', layout=Layout(w…

BoundedFloatText(value=1.5, description='MALIGN_MX (Micrograph miss-alginment max, pixels):', layout=Layout(wi…

BoundedFloatText(value=0.2, description='MALIGN_SG (Micrograph miss-alginment sigma, pixels):', layout=Layout(…

HBox(children=(BoundedFloatText(value=1.0, description='DETECTOR_SNR (Micrographs SNR range):', layout=Layout(…

HBox(children=(BoundedIntText(value=-60, description='TILT_ANGS (Degrees; start, end, step):', layout=Layout(w…

Button(description='Exec', style=ButtonStyle())

Output()