# Extracting and storage of the FISH images from the .lif files

Images taken during the last FISH experiment (2020), on the SP8 Leica microscope.

## Import the python libraries

In [None]:
%matplotlib inline

# To import the files
import glob
from pathlib import Path

# To read the images from the lif files
import read_lif

# To store the extracted data into a dataframe
import pandas as pd
import numpy as np

# To plot the images
import matplotlib.pyplot as plt
import seaborn as sns

## Read the files

In [None]:
files = glob.glob('/Volumes/Seagate/eth/0_Leica_SP8/*/*.lif')

In [None]:
len(files)

In [None]:
%%time

property_dict = dict()
frame_dict = dict()

frame_count = 0

for f in files:
    reader = read_lif.Reader(f)
    reader_name = f'{Path(f).parent.name}_{Path(f).name}'
    series = reader.getSeries()
    
    for s in series:
        channel_nbr = len(s.getChannels())
        shape = s.getFrameShape()
        nbr_frame = s.getNbFrames()
        serie_name = s.getName()
        
        for c in range(channel_nbr):
            channel = s.getFrame2D(channel=c)
            
            properties = [reader_name, serie_name, c, shape, nbr_frame]
            property_dict.update({frame_count:properties})
            frame_dict.update({frame_count:channel})
            
            
            frame_count = frame_count+1
            
print(f'frame_count:{frame_count}')

In [None]:
len(property_dict.items())

In [None]:
len(frame_dict.items())

## Convert the dictionnary into a dataframe 

In [None]:
df = pd.DataFrame.from_dict(property_dict, orient='index', columns=['file', 'serie', 'channel', 'shape_ZXY', 'nbr_frames'])

In [None]:
def md(s):
    a = np.array(s)
    return a[a>1]

In [None]:
df2 = df['shape_ZXY'].apply(md).apply(pd.Series).rename(columns={0:'shape_X', 1:'shape_Y'})

In [None]:
df2

In [None]:
df = df.merge(df2, left_index=True, right_index=True).drop(['shape_ZXY'], axis=1)

In [None]:
df

In [None]:
df.info()

In [None]:
df['shape_X'].unique()

In [None]:
df['shape_Y'].unique()

In [None]:
# Store the dataframe to parquet
df.to_parquet('parquets/image_information.parquet')

In [None]:
# Read the dataframe parquet file:
image_info = pd.read_parquet('parquets/image_information.parquet')

In [None]:
image_info

## Image storage to numpy

In [None]:
shape_dict = image_info.groupby(['shape_X', 'shape_Y']).indices

In [None]:
len(shape_dict)

In [None]:
key_list = []
image_array_lists = []

for keys, values in shape_dict.items():
    key_list.append(keys)
    
    image_arrays = []
    for v in values:
        image_arrays.append(frame_dict[v])
    
    image_array_lists.append(image_arrays)

In [None]:
for i, k in enumerate(key_list):
    np.save(f'image_arrays/{i}_images_{"_".join(tuple(map(str,k)))}.npy', image_array_lists[i])

#### Check that all the arrays have been saved

In [None]:
nbr_tot_arrays = 0

for k in key_list:
    nbr_tot_arrays = nbr_tot_arrays + len(shape_dict[k])
    
nbr_tot_arrays

In [None]:
image_arrays = glob.glob('image_arrays/*.npy')

In [None]:
image_arrays

In [None]:
image_count = 0
arrays = dict()

for array in image_arrays:
    name = Path(array).stem
    arrays.update({name:np.load(array)})
    
    image_count = image_count + len(arrays[name])
    
image_count

In [None]:
arrays.keys()

In [None]:
len(arrays)