In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Script: make fxw plot of distribution from FW summaries.

Created on Sun May 23 2021

@author: yoav
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import concurrent.futures as cf
from itertools import product
import glob as glob
import xml.etree.ElementTree as ET
import re as re
from matplotlib import cm
import subprocess

In [2]:
def get_lam(keyable, key):
    if key in keyable:
        return keyable[key]
    key2 = key[:3] + key[4] + key[3] + key[5:]
    if key2 in keyable:
        return keyable[key2]
    return keyable[key] # raise KeyError

def data_from_files(metadata):
    """Get data from files.
    
    Extract data from the files in the fw subfolder of metadata["folder"]
    """

    datas = [[{} for w in metadata["file_ws"]] for f in metadata["file_fs"]]
    metas = [[{**metadata} for w in metadata["file_ws"]] for f in metadata["file_fs"]]
    for i,f in enumerate(metadata["file_fs"]):
        for j,w in enumerate(metadata["file_ws"]):
            meta = metas[i][j]
            meta["file_fs"] = f
            meta["file_ws"] = w
            if metadata["file_r"] is not None:
                r = metadata["file_r"]
                meta["folder"] = metadata["folder"] + f"/x000{f+8*w}_f{f:02}w{w:02}c00r00"
            else:
                meta["folder"] = metadata["folder"] + f"/f{f}w{w}"

    arg_generator = (
                     (datas[i[0]][i[1]], metas[i[0]][i[1]])
                     for i in 
                     product( range(len(metadata["file_fs"])), range(len(metadata["file_ws"])) )
                    )

    with cf.ThreadPoolExecutor() as ex:
        ex.map(tuple_args_ij_files, arg_generator)
    #for args in arg_generator:
    #    tuple_args_ij_files(args)


    return datas #and update metadata


def tuple_args_ij_files(arg_tuple):
    """Wrap single file function for map."""
    dist_from_ij(*arg_tuple)


def dist_from_ij(data, meta):
    """Get cluster distribution of a single folder."""
    #  start
    
    data["success"] = True
    try:
        # get simulation parameters
        timestep_vtu = glob.glob(meta["folder"] + "/timestep*.vtu")
        if not timestep_vtu:
            raise FileNotFoundError("no timesteps were found")
        # parse vtu as xml file
        tree = ET.parse(timestep_vtu[-1])
        root = tree.getroot()
        
        # directly extract the one tape, get f,w
        tape = root.findall('tape')[0].text
        f_regex = re.compile(r"\n(F=[\d.]+)")
        w_regex = re.compile(r"\n(w=[\d.]+)")
        true_f = re.findall(f_regex, tape)[0]
        true_w = re.findall(w_regex, tape)[0]
        data["f"] = float(true_f[2:])
        data["w"] = float(true_w[2:])

        
        ##number of vertex
        #n_vertices = root.findall('trisurf')[0].attrib["nvtx"]
        type_nodes = root.findall('.//*[@Name="type"]')
        if type_nodes:
            t_node = root.findall('.//*[@Name="spontaneous_curvature"]')[0]
            num_active = sum(int(x)&2==2 for x in t_node.text.strip().split())
        else:
            c_node = root.findall('.//*[@Name="spontaneous_curvature"]')[0]
            num_active = sum(float(x)>0 for x in c_node.text.strip().split())


    
        # now for the data
        pystat_names = ("No", "Volume", "Area", "lambda1",
                        "lambda2", "lambda3", "Nbw/Nb", "hbar",
                        "mean_cluster_size", "std_cluster_size",
                        "line_length", "asphericity", "gyration_radius",
                        "acylindricity", "lamdba1",
                        "lamdba2", "lamdba3"
                       )
        stat_names = ("Epoch","OuterLoop", "VertexMoveSucessRate",
                  "BondFlipSuccessRate"
                     )
        calculated_hist_names = ("cluster_size_dist")

    
        if meta["data_type"] in pystat_names:
            # get a main_statistics or pystatisics 
            # (regular statistics: output of trisurf)
            stat_csv = glob.glob(meta["folder"]+'/[mp]*.csv')[0]
            df = pd.read_csv(stat_csv)
            data["x"] = df["No"][meta["slice"]]
        
        elif meta["data_type"] in stat_names:
            # get stuff that only exists in statistics.csv
            df = pd.read_csv(meta["folder"]+'/statistics.csv')
            data["x"] = df["OuterLoop"][meta["slice"]]
    
        elif meta["data_type"] in calculated_hist_names:
            file_list=glob.glob(meta["folder"]+'/hist*.csv')
            hist = np.zeros([num_active,2], dtype=int)
            hist[:,0] = np.arange(1,num_active+1)
            # couldn't find any way to make pandas do this
            for file in file_list[meta["slice"]]:
                a = np.genfromtxt(file,
                                  skip_header=1,
                                  delimiter=',',
                                  dtype=int)
                hist[a[...,0]-1,1] += a[...,1]
            df = pd.DataFrame({"cluster_size":hist[...,0], "number_of_clusters":hist[...,1]/float(sum(hist[...,1]))})
            data["x"] = df["cluster_size"]
        

        # use options
        if meta["data_type"]=='cluster_size_dist':
            data["y"] = df["number_of_clusters"]
        
        elif meta["data_type"]=='asphericity':
            data["y"] = (get_lam(df,'lambda3')[meta["slice"]]
                        - 0.5*get_lam(df,'lambda2')[meta["slice"]]
                        - 0.5*get_lam(df,'lambda1')[meta["slice"]])
        
        elif meta["data_type"]=='gyration_radius':
            data["y"] = (get_lam(df,'lambda1')[meta["slice"]]
                        + get_lam(df,'lambda2')[meta["slice"]]
                        + get_lam(df,'lambda3')[meta["slice"]])
        
        elif meta["data_type"]=='acylindricity':
            data["y"] = (get_lam(df,'lambda2')[meta["slice"]]
                        - get_lam(df,'lambda1')[meta["slice"]])
    
        elif meta["data_type"] in {"lambda1", "lambda2", "lambda3", "lamdba1", "lamdba2", "lamdba3"}:
            data["y"] = get_lam(df,meta["data_type"])[meta["slice"]]
        else:

            data["y"] = df[meta["data_type"]][meta["slice"]]
    
        if meta["data_type"] in calculated_hist_names:
            f = data["y"]/(data["y"].sum())
            if meta["do_mean"]:
                data["mean"] = (f*data["x"]).sum()
            if meta["do_std"]:
                data["std"] = (f*(data["x"]-(f*data["x"]).sum())**2).sum()**0.5
            if meta["do_skew"]:
                data["skew"] = (f*( (data["x"]-(f*data["x"]).sum())**3 / ((f*(data["x"]-(f*data["x"]).sum())**2).sum())**1.5 )).sum()
            if meta["do_kurtosis"]:
                data["kurtosis"] = (f*( (data["x"]-(f*data["x"]).sum())**4 / ((f*(data["x"]-(f*data["x"]).sum())**2).sum())**2 )).sum()
        else:
            if meta["do_mean"]:
                data["mean"] = data["y"].mean()
            if meta["do_std"]:
                data["std"] = data["y"].std()
            if meta["do_skew"]:
                data["skew"] = data["y"].skew()
            if meta["do_kurtosis"]:
                data["kurtosis"] = data["y"].kurtosis() # please?
    except FileNotFoundError as e:
        data["y"] = np.nan
        data["x"] = np.nan
        data["success"] = False
        ## try and get a value for f,w
         # get simulation parameters
        timestep_vtu = glob.glob(meta["folder"] + "/*.vtu")
        if timestep_vtu: # found an alternative vtu
            tree = ET.parse(timestep_vtu[-1])
            root = tree.getroot()
            
            # directly extract the one tape, get f,w
            tape = root.findall('tape')[0].text
            f_regex = re.compile(r"\n(F=[\d.]+)")
            w_regex = re.compile(r"\n(w=[\d.]+)")
            true_f = re.findall(f_regex, tape)[0]
            true_w = re.findall(w_regex, tape)[0]
            data["f"] = float(true_f[2:])
            data["w"] = float(true_w[2:])
        else: # try simulation_parameter
            with open(meta["folder"]+"/simulation_parameter","r") as file:
                text = file.read()
                f_regex = re.compile(r"(f=[\d.]+)")
                w_regex = re.compile(r"(w=[\d.]+)")
                true_f = re.findall(f_regex, text)[0]
                true_w = re.findall(w_regex, text)[0]
                data["f"] = float(true_f[2:])
                data["w"] = float(true_w[2:])
                
                
            
    except Exception as e:
        data["e"] = e
        raise e


In [3]:
def plot_fxw(datas, metadata):
    """Plot fxw graphs.

    plot fxw graphs in a square, in a matrixlike form
    from a fxw list of datas
    """
    def dist_label_mean(x):
        return fr'$\mu = {x:.2f}$'

    def dist_label_std(x):
        return fr'$\sigma = {x:.2f}$'
    
    def filter_dict(given_dict, default_dict, irrelevant_keys_set):
        "filter a given dectionary to only include keys in the relevant_keys set, then add defaults"
        relevant_given = {key: value for key, value in given_dict.items() if key not in irrelevant_keys_set}
        for key in default_dict.keys():
            if key not in relevant_given:
                relevant_given[key]=default_dict[key]
        return relevant_given
            
    
    #def axis_range(min_val,max_val):
    #    mid = (max_val + min_val) /2
    #    dif = (max_val - min_val) /2
    #    return mid-1.05*dif, mid+1.05*dif

    f_range, w_range = list(range(len(datas))), list((range(len(datas[0]))))
    
    # ranges: matrix ij goes down with i, right with j
    # xy goes right with x and up with y- x=j, y=reverse(i)
    if metadata["order"] == "ij=fw":
        i_range, j_range = f_range, w_range
        ij_datas = [ [datas[i][j] for j in j_range] for i in i_range ]
        x_param, y_param = "w", "f"
        do_mat = True
    elif metadata["order"] == "ij=wf":
        i_range, j_range = w_range, f_range
        ij_datas = [ [datas[j][i] for j in j_range] for i in i_range ]
        x_param, y_param = "f", "w"
        do_mat = True
    elif metadata["order"] == "xy=fw":
        j_range, i_range = f_range, w_range
        ij_datas = [ [datas[j][i] for j in j_range] for i in reversed(i_range) ]
        x_param, y_param = "f", "w"
        do_mat = False
    elif metadata["order"] == "xy=wf":
        i_range, j_range = f_range, w_range
        ij_datas = [ [datas[i][j] for j in j_range] for i in reversed(i_range) ]
        x_param, y_param = "w", "f"
        do_mat = False

    
    if metadata["plot_type"] == "pcolor":
        #pcolor is a single mesh
        if "axe_scale" in metadata:
            plt.xscale(metadata["axe_scale"][0])
            plt.yscale(metadata["axe_scale"][1])
        # data key:
        if metadata["do_mean"]:
            key="mean"
        elif metadata["do_std"]:
            key="std"
        elif metadata["do_skew"]:
            key="skew"
        elif metadata["do_kurtosis"]:
            key="kurtosis"
        else:
            key="y"

        for datastrip in ij_datas:
            for data in datastrip:
                if not data["success"]:
                    data[key] = np.nan
        # not sure if pcolor follows matrix or xy coordinates
        if metadata["data_transform"]:
            mesh = [[metadata["data_transform"](data[key]) for data in datastrip ] for datastrip in ij_datas]
        else:
            mesh = [[data[key] for data in datastrip ] for datastrip in ij_datas]
        X = [[data[x_param] for data in datastrip ] for datastrip in ij_datas]
        Y = [[data[y_param] for data in datastrip ] for datastrip in ij_datas]

        
        
        default_mesh_params={"shading":"nearest"}
        irrelevant_mesh_keys={"bottom", "align","color"}
        plt.pcolormesh( X, Y, mesh,
                    **filter_dict(metadata["plot_kwargs"], 
                                       default_mesh_params,
                                       irrelevant_mesh_keys)
                    )
        plt.xlabel(x_param)
        plt.ylabel(y_param)
 
        if metadata["do_title"]:
            plt.title(f' FW plot: {metadata["data_type"]}')
            
        plt.xticks(X[0])
        plt.yticks([a[0] for a in Y])
        if do_mat:
            bottom, top = plt.ylim()
            plt.ylim(top, bottom)
        plt.colorbar()
        return True
        


    # make big figure
    plt.rcParams['figure.figsize'] = [10, 6.8] # [10,6.8] # [20,18] 4_lines: worked with [40, 6.8]
    if metadata["fig_size"]:
        plt.rcParams['figure.figsize'] = metadata["fig_size"]
    if "axe_share" in metadata:
        fig, axes = plt.subplots(nrows=len(i_range), ncols=len(j_range), sharex=metadata["axe_share"][0], sharey=metadata["axe_share"][1], num=metadata["data_type"])
    else:
        fig, axes = plt.subplots(nrows=len(i_range), ncols=len(j_range), num=metadata["data_type"])


    for i in i_range:
        for j in j_range:
            try:
                axe = axes[i, j]
            except TypeError:
                axe=axes
            data = ij_datas[i][j]
            if data["success"] == False:
                pass
            else:
            
                if "axe_scale" in metadata:
                    axe.set_xscale(metadata["axe_scale"][0])
                    axe.set_yscale(metadata["axe_scale"][1])
    
                if metadata["plot_type"] == "plot":
                    default_plot_params={}
                    irrelevant_plot_keys={"bottom", "cmap", "align"}
                    axe.plot(data["x"], data["y"],
                             **filter_dict(metadata["plot_kwargs"], 
                                           default_plot_params,
                                          irrelevant_plot_keys)
                            )
                elif metadata["plot_type"] == "bar":
                    default_bar_params={"snap":False}
                    irrelevant_bar_keys={"cmap",}
                    axe.bar(data["x"], data["y"],
                             **filter_dict(metadata["plot_kwargs"], 
                                           default_bar_params,
                                           irrelevant_bar_keys)
                           )
                elif metadata["plot_type"] == "stem":
                    default_stem_params={"markerfmt":",", "basefmt":","}
                    irrelevant_stem_keys={"color","cmap","align"}
                    axe.stem(data["x"], data["y"],
                            **filter_dict(metadata["plot_kwargs"], 
                                           default_stem_params,
                                           irrelevant_stem_keys)
                            )
                elif metadata["plot_type"] == "hist":
                    default_stem_params={}
                    irrelevant_stem_keys={"color","cmap","align"}
                    axe.hist(data["y"],
                             **{key: value for key,value in metadata["plot_kwargs"].items() 
                                if key not in {"cmap"}}
                            )
                
    
                
                if metadata["do_legend"]:
                    # custom legend
                    if metadata["do_mean"]:
                        axe.plot([], [], marker='.', color='red',
                                 label=dist_label_mean(data["mean"]))
                    if metadata["do_std"]:
                        axe.plot([], [], marker='.', color='red',
                                 label=dist_label_std(data["std"]))
                    if metadata["do_skew"]:
                        axe.plot([], [], marker='.', color='red',
                                 label=r'skew $' fr' = {data["skew"]:.2f}$')
                    if metadata["do_kurtosis"]:
                        axe.plot([], [], marker='.', color='red',
                                 label=r'kurtosis $' fr' = {data["kurtosis"]:.2f}$')
    
                                 
                if metadata["do_title"]:
                    if metadata["do_title_name"]:
                        axe.set_title(f' f={data["f"]}, w={data["w"]}: {metadata["data_type"]}')
                    else:
                        axe.set_title(f' f={data["f"]}, w={data["w"]}')
                # axe.set_aspect('equal', 'box')
    
                if metadata["do_legend"]:
                    axe.legend(numpoints=1, handlelength=0,
                               markerscale=0, handletextpad=0)

    plt.tight_layout()
    plt.show()
    return True

In [15]:

%matplotlib widget
"""Do main function.

does the thing
"""
main_loc = (r'/mnt/c/Users/yoavr/Desktop'
            r'/paraview_pipeline/hello_chemfarm'
            r'/FW_block_aggregated/timesteps'
            #r'/13_from_various_stability/4_from_pearling' # goes to 300, 4x4
            #r'/HIV_gag/2_attempt_2'
            #r'/HIV_gag/4_lines' # goes to 150
            #r'/HIV_gag/3_fixed' # 5x5, FW_block equivalent [2,6|5,9]
            #r'/bicurvatures/1_characterize'
            #r'/chemfarm_example'
           )
metadata={}
metadata["data_type"]= "cluster_size_dist" # lambda1,lambda2,lamdba3, Area, Volume, mean_cluster_size, asphericity, gyration_radius, acylindricity, cluster_size_dist
metadata["slice"]=slice(-50,None,None)
metadata["folder"]=main_loc
metadata["do_mean"]=True
metadata["do_std"]=True
metadata["do_skew"] = False
metadata["do_kurtosis"] = False
metadata["file_fs"] = [0,1,2,3,5,6,7]
metadata["file_ws"] = [0,1,3,4,5,6,7,8,9,10,11,12,13,14]
metadata["file_r"] = None
metadata["axe_share"] = (True, True) # sharex, sharey
metadata["order"] = "ij=wf" # ij=fw ij=wf xy=fw xy=wf
metadata["do_legend"] = True
metadata["do_title"] = True
metadata["do_title_name"] = False
metadata["plot_type"] = "bar" # pcolor*, plot, bar, hist, stem
metadata["axe_scale"] = "linear", "log" #xy:  "linear", "log", "symlog", "logit"
metadata["plot_kwargs"] = {"bottom": 0, "color": 'red', "cmap":cm.plasma, "align":'center'}
metadata["data_transform"] = None # lambda x: x*(x<600)+600*(x>600)
metadata["fig_size"] = [6,6] # [10,6.8] # [20,18] 4_lines: worked with [40, 6.8]

datas = data_from_files(metadata)
plot_fxw(datas, metadata)




Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.tight_layout()


True

In [16]:
fig = plt.gcf()

fig.savefig('FW_aggregate_giant_dist.png')



In [2]:
import glob as glob

In [37]:
datas[0][0]

{'success': True, 'e': IndexError('list index out of range')}

In [46]:
file_list = glob.glob(r'/mnt/c/Users/yoavr/Desktop'
            r'/paraview_pipeline/hello_chemfarm'
            r'/FW_block_aggregated/timesteps/f3w9/' 'histogram*.csv')
slc = slice(-20,-1)
max_cluster = 450
hist = np.zeros([450,2], dtype=int)
hist[:,0] = np.arange(1,451)
for file in file_list[slc]:
    a = np.genfromtxt(file,
                      skip_header=1,
                      delimiter=',',
                      dtype=int)
    hist[a[...,0]-1,1] += a[...,1]

In [56]:
timestep_vtu = glob.glob(r'/mnt/c/Users/yoavr/Desktop'
            r'/paraview_pipeline/hello_chemfarm'
            r'/FW_block_aggregated/timesteps/f3w9/' '*vtu')
tree = ET.parse(timestep_vtu[-1])
root = tree.getroot()
node = root.findall('.//*[@Name="type"]')
c_node = root.findall('.//*[@Name="spontaneous_curvature"]')[0]
num_vertex = sum(float(x)>0 for x in c_node.text.strip().split())
num_vertex

450

In [28]:
with open(r'/mnt/c/Users/yoavr/Desktop'
            r'/paraview_pipeline/hello_chemfarm'
            r'/bicurvatures/1_characterize/f1w1' '/simulation_parameters',"r") as file:
        text = file.read()
        
text
f_regex = re.compile(r"(f=[\d.]+)")
w_regex = re.compile(r"(w=[\d.]+)")
true_f = re.findall(f_regex, text)[0]
true_w = re.findall(w_regex, text)[0]
data["f"] = float(true_f[2:])
data["w"] = float(true_w[2:])

NameError: name 'data' is not defined

In [30]:
true_f

'f=0.1'

In [62]:
pd.DataFrame({"cluster_size":hist[:,0], "number_of_clusters":hist[:,1]})

Unnamed: 0,cluster_size,number_of_clusters
0,1,1
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
445,446,0
446,447,0
447,448,0
448,449,1


In [49]:
a={"hi":1,"bye":2}
b={"hi":3,"hey":4}

In [63]:
a.keys() | b.keys()

{'bye', 'hey', 'hi'}

In [67]:
for key in a.keys() | b.keys():
    print(a[key]) if key in a else print(b[key])

1
4
2


In [76]:
def filter_dict(given_dict, default_dict, relevant_keys_set):
        "filter a given dectionary to only include keys in the relevant_keys set, then add defaults"
        relevant_given = {key: value for key, value in given_dict.items() if key in relevant_keys_set}
        for key in default_dict.keys():
            if key not in relevant_given:
                relevant_given[key]=default_dict[key]
        return relevant_given

In [78]:
filter_dict(b,a,{"bye","hi"})

{'hi': 3, 'bye': 2}

In [82]:
a["banana"]

KeyError: 'banana'

In [17]:
pd.read_csv('/opt/workspace/thing.csv')

FileNotFoundError: [Errno 2] No such file or directory: '/opt/workspace/thing.csv'