# Dataframe containing the information for FISH image analysis

Images taken during the last FISH experiment (2020), on the SP8 Leica microscope.  
Informations that will allow to compare the different images between them.

## Import the python libraries

In [15]:
# To import the files
import glob
from pathlib import Path

# To read the images from the lif files
import read_lif

# To read the dataframes
import pandas as pd
import numpy as np

## Load the dataframes

In [16]:
# Read the image dataframe parquet file:
image_info = pd.read_parquet('parquets/image_information.parquet')

In [17]:
image_info.groupby(['file', 'serie'])[['channel']].count().rename(columns={'channel' : 'channel_count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,channel_count
file,serie,Unnamed: 2_level_1
200304_200304_Rev10a2_GFP_Lforms.lif,Series018,4
200304_200304_Rev10a2_GFP_Lforms.lif,Series019,4
200304_200304_Rev10a2_GFP_Lforms.lif,Series020,4
200304_200304_Rev10a2_GFP_Lforms.lif,w7_p1_form632_sp100_z5,4
200304_200304_Rev10a2_GFP_Lforms.lif,w7_p2_form3144_sp100_z1,4
...,...,...
200316_Sample_6.lif,Image008,4
200316_Sample_6.lif,Image009,4
200316_Sample_6.lif,Image010,4
200316_Sample_6.lif,Image011,4


In [18]:
image_info = image_info.merge(_, left_on=['file', 'serie'], right_index=True, how='left')

In [19]:
image_info.head(10)

Unnamed: 0,file,serie,channel,nbr_frames,shape_X,shape_Y,channel_count
0,200316_Sample_10.lif,Image001,0,1,512,512,4
1,200316_Sample_10.lif,Image001,1,1,512,512,4
2,200316_Sample_10.lif,Image001,2,1,512,512,4
3,200316_Sample_10.lif,Image001,3,1,512,512,4
4,200316_Sample_10.lif,Image002,0,1,512,512,4
5,200316_Sample_10.lif,Image002,1,1,512,512,4
6,200316_Sample_10.lif,Image002,2,1,512,512,4
7,200316_Sample_10.lif,Image002,3,1,512,512,4
8,200316_Sample_10.lif,Image003,0,1,512,512,4
9,200316_Sample_10.lif,Image003,1,1,512,512,4


In [20]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   file           1188 non-null   object
 1   serie          1188 non-null   object
 2   channel        1188 non-null   int64 
 3   nbr_frames     1188 non-null   int64 
 4   shape_X        1188 non-null   int64 
 5   shape_Y        1188 non-null   int64 
 6   channel_count  1188 non-null   int64 
dtypes: int64(5), object(2)
memory usage: 74.2+ KB


In [21]:
image_info['channel_count'].unique()

array([4])

In [22]:
# Read the image metadata dataframe parquet file:
image_meta = pd.read_pickle('../metadata_leica_files/parquets_and_pickles/combined_metadata.pickle')

In [23]:
image_meta.head(10)

Unnamed: 0,FileName,Image.@Name,Image.@UniqueID,Channel.@DataType,Channel.@ChannelTag,Channel.@Resolution,Channel.@NameOfMeasuredQuantity,Channel.@Min,Channel.@Max,Channel.@Unit,...,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@SequenceIndex,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@LineDeactivationFlags,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@IsLineChecked,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@OutCheckedIntensity,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@SuppressionMode,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@IsVisible,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting.@CanDoFastModulation,ATLConfocalSettingDefinition.AotfList.Aotf.LaserLineSetting,ATLConfocalSettingDefinition.AotfList.Aotf.BeamRoute.BeamPosition,_merge
0,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,0.0,False,True,False,-1.0,True,True,,,both
1,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,0.0,False,True,False,-1.0,True,True,,,both
2,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,False,False,False,,False,False,"[{'@LaserLine': '458', '@IntensityDev': '0', '...","[{'@BeamPositionLevel': '0', '@BeamPosition': ...",both
3,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,False,False,False,,False,False,"[{'@LaserLine': '458', '@IntensityDev': '0', '...","[{'@BeamPositionLevel': '0', '@BeamPosition': ...",both
4,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,,,,,,,,,left_only
5,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,,,,,,,,,left_only
6,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,False,False,False,,False,False,"[{'@LaserLine': '458', '@IntensityDev': '0', '...","[{'@BeamPositionLevel': '0', '@BeamPosition': ...",both
7,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,,False,False,False,,False,False,"[{'@LaserLine': '458', '@IntensityDev': '0', '...","[{'@BeamPositionLevel': '0', '@BeamPosition': ...",both
8,200316_Sample_10.lif,Image002,d0505376-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,0.0,False,True,False,-1.0,True,True,,,both
9,200316_Sample_10.lif,Image002,d0505376-677d-11ea-a94d-002432168676,0,0,8,,0.0,255.0,,...,0.0,False,True,False,-1.0,True,True,,,both


In [24]:
image_meta.info(float)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2695 entries, 0 to 2694
Data columns (total 255 columns):
 #   Column                                                                                                       Dtype   
---  ------                                                                                                       -----   
 0   FileName                                                                                                     object  
 1   Image.@Name                                                                                                  object  
 2   Image.@UniqueID                                                                                              object  
 3   Channel.@DataType                                                                                            object  
 4   Channel.@ChannelTag                                                                                          Int64   
 5   Channel.@Resolution       

In [25]:
image_meta['Dimension.@Length'][462]

3.6904670000000004e-05

In [26]:
image_meta['Dimension.@DimID'][462]

1

In [27]:
image_meta['ATLConfocalSettingDefinition.@ScanSpeed'][462]

400.0

In [28]:
image_meta['ATLConfocalSettingDefinition.@Zoom'][462]

5.0000127156898895

In [29]:
image_meta['ATLConfocalSettingDefinition.@PixelDwellTime'][462]

2.5625e-06

In [30]:
image_meta['ATLConfocalSettingDefinition.DetectorList.Detector.@Gain'][462]

600.404364080262

In [31]:
image_meta['ATLConfocalSettingDefinition.DetectorList.Detector.@Offset'][462]

0.0

In [32]:
image_meta.iloc[:,164:172]

Unnamed: 0,ATLConfocalSettingDefinition.DetectorList.Detector.@Name,ATLConfocalSettingDefinition.DetectorList.Detector.@Type,ATLConfocalSettingDefinition.DetectorList.Detector.@ScanType,ATLConfocalSettingDefinition.DetectorList.Detector.@Channel,ATLConfocalSettingDefinition.DetectorList.Detector.@ChannelName,ATLConfocalSettingDefinition.DetectorList.Detector.@IsActive,ATLConfocalSettingDefinition.DetectorList.Detector.@Gain,ATLConfocalSettingDefinition.DetectorList.Detector.@Offset
0,PMT 3,PMT,Internal,3,Channel 3,False,600.289921,0.000000
1,PMT 3,PMT,Internal,3,Channel 3,False,600.289921,0.000000
2,HyD4 SMD,HyD,Internal,4,Channel 4,False,350.666885,-0.006667
3,HyD4 SMD,HyD,Internal,4,Channel 4,False,350.666885,-0.006667
4,PMT Trans,PMT,TLD,100,Transmission Channel,False,350.537881,0.000000
...,...,...,...,...,...,...,...,...
2690,PMT 5,PMT,Internal,5,Channel 5,False,962.195773,0.000000
2691,PMT 5,PMT,Internal,5,Channel 5,False,962.195773,0.000000
2692,PMT 5,PMT,Internal,5,Channel 5,False,962.195773,0.000000
2693,PMT 5,PMT,Internal,5,Channel 5,False,962.195773,0.000000


In [33]:
image_meta.iloc[:,14:21]

Unnamed: 0,Dimension.@DimID,Dimension.@NumberOfElements,Dimension.@Origin,Dimension.@Length,Dimension.@Unit,Dimension.@BitInc,Dimension.@BytesInc
0,1,512,-0.000007,0.000037,m,0,1.0
1,2,512,0.000012,0.000037,m,0,512.0
2,1,512,-0.000007,0.000037,m,0,1.0
3,2,512,0.000012,0.000037,m,0,512.0
4,1,512,-0.000007,0.000037,m,0,1.0
...,...,...,...,...,...,...,...
2690,1,1872,-0.000043,0.000110,m,0,1.0
2691,2,1872,-0.000010,0.000110,m,0,1872.0
2692,2,1872,-0.000010,0.000110,m,0,1872.0
2693,2,1872,-0.000010,0.000110,m,0,1872.0


In [34]:
image_meta.iloc[:,3:14]

Unnamed: 0,Channel.@DataType,Channel.@ChannelTag,Channel.@Resolution,Channel.@NameOfMeasuredQuantity,Channel.@Min,Channel.@Max,Channel.@Unit,Channel.@LUTName,Channel.@IsLUTInverted,Channel.@BytesInc,Channel.@BitInc
0,0,0,8,,0.0,255.0,,Blue,False,0.0,0
1,0,0,8,,0.0,255.0,,Blue,False,0.0,0
2,0,0,8,,0.0,255.0,,Green,False,262144.0,0
3,0,0,8,,0.0,255.0,,Green,False,262144.0,0
4,0,0,8,,0.0,255.0,,Gray,False,524288.0,0
...,...,...,...,...,...,...,...,...,...,...,...
2690,0,0,8,,0.0,255.0,,Red,False,10513152.0,0
2691,0,0,8,,0.0,255.0,,Red,False,10513152.0,0
2692,0,0,8,,0.0,255.0,,Red,False,10513152.0,0
2693,0,0,8,,0.0,255.0,,Red,False,10513152.0,0


### Add LUT channel to the `image_info` dataframe

In [35]:
# Channel related data + file name + image name and id
channel_meta = image_meta.iloc[:,0:14]

In [36]:
channel_meta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2695 entries, 0 to 2694
Data columns (total 14 columns):
 #   Column                           Non-Null Count  Dtype   
---  ------                           --------------  -----   
 0   FileName                         2695 non-null   object  
 1   Image.@Name                      2680 non-null   object  
 2   Image.@UniqueID                  2680 non-null   object  
 3   Channel.@DataType                2680 non-null   object  
 4   Channel.@ChannelTag              2680 non-null   Int64   
 5   Channel.@Resolution              2680 non-null   Int64   
 6   Channel.@NameOfMeasuredQuantity  2680 non-null   object  
 7   Channel.@Min                     2680 non-null   float64 
 8   Channel.@Max                     2680 non-null   float64 
 9   Channel.@Unit                    2680 non-null   object  
 10  Channel.@LUTName                 2680 non-null   category
 11  Channel.@IsLUTInverted           2695 non-null   bool    
 12  Channe

In [37]:
# Remove the duplicated rows that are due to the differente dimensions (2 dimensions for each channel)
channel_meta.drop_duplicates(inplace=True, ignore_index=True)

In [38]:
# Drop the row for which there is no image and no image ID
channel_meta.dropna(axis=0, how='all', subset=['Image.@Name', 'Image.@UniqueID', 'Channel.@LUTName'], inplace=True)

In [39]:
# Select the column containing relevant information (LUT information)
lut_meta = channel_meta[['FileName', 'Image.@Name', 'Image.@UniqueID', 'Channel.@LUTName']].reset_index(drop=True)

In [40]:
lut_meta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1188 entries, 0 to 1187
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   FileName          1188 non-null   object  
 1   Image.@Name       1188 non-null   object  
 2   Image.@UniqueID   1188 non-null   object  
 3   Channel.@LUTName  1188 non-null   category
dtypes: category(1), object(3)
memory usage: 29.3+ KB


In [41]:
lut_meta.head(3)

Unnamed: 0,FileName,Image.@Name,Image.@UniqueID,Channel.@LUTName
0,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Blue
1,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Green
2,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Gray


In [42]:
# Merge the LUT information with the image dataframe (dataframe built while extracting the images)
image_info = image_info.merge(lut_meta, how='outer', left_index=True, right_index=True, indicator=True)

In [43]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   file              1188 non-null   object  
 1   serie             1188 non-null   object  
 2   channel           1188 non-null   int64   
 3   nbr_frames        1188 non-null   int64   
 4   shape_X           1188 non-null   int64   
 5   shape_Y           1188 non-null   int64   
 6   channel_count     1188 non-null   int64   
 7   FileName          1188 non-null   object  
 8   Image.@Name       1188 non-null   object  
 9   Image.@UniqueID   1188 non-null   object  
 10  Channel.@LUTName  1188 non-null   category
 11  _merge            1188 non-null   category
dtypes: category(2), int64(5), object(5)
memory usage: 144.7+ KB


In [44]:
image_info['_merge'].unique()

[both]
Categories (1, object): [both]

In [45]:
image_info['file_match'] = np.where(image_info['file'] == image_info['FileName'], 'True', 'False')

In [46]:
image_info['file_match'].unique()

array(['True'], dtype=object)

In [47]:
image_info['serie_match'] = np.where(image_info['serie'] == image_info['Image.@Name'], 'True', 'False')

In [48]:
image_info['serie_match'].unique()

array(['True'], dtype=object)

In [49]:
image_info = image_info.drop(['file_match', 'serie_match', 'FileName', 'Image.@Name', '_merge'], axis=1)

In [50]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   file              1188 non-null   object  
 1   serie             1188 non-null   object  
 2   channel           1188 non-null   int64   
 3   nbr_frames        1188 non-null   int64   
 4   shape_X           1188 non-null   int64   
 5   shape_Y           1188 non-null   int64   
 6   channel_count     1188 non-null   int64   
 7   Image.@UniqueID   1188 non-null   object  
 8   Channel.@LUTName  1188 non-null   category
dtypes: category(1), int64(5), object(3)
memory usage: 124.9+ KB


In [51]:
image_info.shape_X.value_counts()

512     744
632     220
3144    184
600      12
1408      8
2480      8
1872      4
2608      4
320       4
Name: shape_X, dtype: int64

### Add physical length information to the `image_info` dataframe

In [52]:
# '+' concatenates the values of the two lists
dimension_meta = image_meta.iloc[:, list(range(0,3)) + list(range(10,11)) + list(range(14,21))]

In [53]:
dimension_meta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2695 entries, 0 to 2694
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype   
---  ------                       --------------  -----   
 0   FileName                     2695 non-null   object  
 1   Image.@Name                  2680 non-null   object  
 2   Image.@UniqueID              2680 non-null   object  
 3   Channel.@LUTName             2680 non-null   category
 4   Dimension.@DimID             2680 non-null   Int64   
 5   Dimension.@NumberOfElements  2680 non-null   Int64   
 6   Dimension.@Origin            2680 non-null   float64 
 7   Dimension.@Length            2680 non-null   float64 
 8   Dimension.@Unit              2680 non-null   object  
 9   Dimension.@BitInc            2680 non-null   Int64   
 10  Dimension.@BytesInc          2680 non-null   float64 
dtypes: Int64(3), category(1), float64(3), object(4)
memory usage: 322.3+ KB


In [54]:
# Drop duplicated rows
dimension_meta.drop_duplicates(inplace=True, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [55]:
# Drop the row for which there is no image and no image ID
dimension_meta.dropna(axis=0, how='all', subset=['Image.@Name', 'Image.@UniqueID', 'Channel.@LUTName'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [56]:
dimension_meta.head(3)

Unnamed: 0,FileName,Image.@Name,Image.@UniqueID,Channel.@LUTName,Dimension.@DimID,Dimension.@NumberOfElements,Dimension.@Origin,Dimension.@Length,Dimension.@Unit,Dimension.@BitInc,Dimension.@BytesInc
0,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Blue,1,512,-7e-06,3.7e-05,m,0,1.0
1,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Blue,2,512,1.2e-05,3.7e-05,m,0,512.0
2,200316_Sample_10.lif,Image001,c976e627-677d-11ea-a94d-002432168676,Green,1,512,-7e-06,3.7e-05,m,0,1.0


In [57]:
# Select only rows concerning the dimension 1 == X
dimension_meta_X = dimension_meta.loc[dimension_meta['Dimension.@DimID'] == 1].add_suffix('_X')

In [58]:
# Select only rows concerning the dimension 2 == Y
dimension_meta_Y = dimension_meta.loc[dimension_meta['Dimension.@DimID'] == 2].add_suffix('_Y')

In [59]:
# Merge the the two dimension-meta dataframe
dimension_meta = dimension_meta_X.merge(dimension_meta_Y, how='outer', left_on=['FileName_X', 'Image.@Name_X', 'Image.@UniqueID_X', 'Channel.@LUTName_X'], 
                       right_on=['FileName_Y', 'Image.@Name_Y', 'Image.@UniqueID_Y', 'Channel.@LUTName_Y'], indicator=True)

In [60]:
dimension_meta['_merge'].unique()

[both]
Categories (1, object): [both]

In [61]:
dimension_meta['file_match'] = np.where(dimension_meta['FileName_X'] == dimension_meta['FileName_Y'], 'True', 'False')

In [62]:
dimension_meta['file_match'].unique()

array(['True'], dtype=object)

In [63]:
dimension_meta['serie_match'] = np.where(dimension_meta['Image.@Name_X'] == dimension_meta['Image.@Name_Y'], 'True', 'False')

In [64]:
dimension_meta['serie_match'].unique()

array(['True'], dtype=object)

In [65]:
dimension_meta['id_match'] = np.where(dimension_meta['Image.@UniqueID_X'] == dimension_meta['Image.@UniqueID_Y'], 'True', 'False')

In [66]:
dimension_meta['id_match'].unique()

array(['True'], dtype=object)

In [67]:
dimension_meta['channel_match'] = np.where(dimension_meta['Channel.@LUTName_X'] == dimension_meta['Channel.@LUTName_Y'], 'True', 'False')

In [68]:
dimension_meta['channel_match'].unique()

array(['True'], dtype=object)

In [69]:
# Drop duplicated and verification columns
dimension_meta.drop(['FileName_Y', 'Image.@Name_Y', 'Image.@UniqueID_Y', 'Channel.@LUTName_Y', 'file_match', 'serie_match', 'id_match', 
                                      'channel_match', '_merge'], axis=1, inplace=True)

In [70]:
# Select the columns of interest -- > drop the other columns
dimension_meta.drop(['Dimension.@Origin_X', 'Dimension.@BitInc_X', 'Dimension.@BytesInc_X', 'Dimension.@Origin_Y', 'Dimension.@BitInc_Y',
                     'Dimension.@BytesInc_Y'], axis=1, inplace=True)

In [71]:
dimension_meta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype   
---  ------                         --------------  -----   
 0   FileName_X                     1188 non-null   object  
 1   Image.@Name_X                  1188 non-null   object  
 2   Image.@UniqueID_X              1188 non-null   object  
 3   Channel.@LUTName_X             1188 non-null   category
 4   Dimension.@DimID_X             1188 non-null   Int64   
 5   Dimension.@NumberOfElements_X  1188 non-null   Int64   
 6   Dimension.@Length_X            1188 non-null   float64 
 7   Dimension.@Unit_X              1188 non-null   object  
 8   Dimension.@DimID_Y             1188 non-null   Int64   
 9   Dimension.@NumberOfElements_Y  1188 non-null   Int64   
 10  Dimension.@Length_Y            1188 non-null   float64 
 11  Dimension.@Unit_Y              1188 non-null   object  
dtypes: Int64(4), category(1), float64(

In [72]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   file              1188 non-null   object  
 1   serie             1188 non-null   object  
 2   channel           1188 non-null   int64   
 3   nbr_frames        1188 non-null   int64   
 4   shape_X           1188 non-null   int64   
 5   shape_Y           1188 non-null   int64   
 6   channel_count     1188 non-null   int64   
 7   Image.@UniqueID   1188 non-null   object  
 8   Channel.@LUTName  1188 non-null   category
dtypes: category(1), int64(5), object(3)
memory usage: 124.9+ KB


In [73]:
# Merge the LUT information with the image dataframe (dataframe built while extracting the images)
image_info = image_info.merge(dimension_meta, how='outer', left_on=['file', 'serie', 'Image.@UniqueID', 'Channel.@LUTName'], 
                       right_on=['FileName_X', 'Image.@Name_X', 'Image.@UniqueID_X', 'Channel.@LUTName_X'], indicator=True)

In [74]:
image_info['_merge'].unique()

[both]
Categories (1, object): [both]

In [75]:
image_info['file_match'] = np.where(image_info['file'] == image_info['FileName_X'], 'True', 'False')

In [76]:
image_info['file_match'].unique()

array(['True'], dtype=object)

In [77]:
image_info['serie_match'] = np.where(image_info['serie'] == image_info['Image.@Name_X'], 'True', 'False')

In [78]:
image_info['serie_match'].unique()

array(['True'], dtype=object)

In [79]:
image_info['id_match'] = np.where(image_info['Image.@UniqueID'] == image_info['Image.@UniqueID_X'], 'True', 'False')

In [80]:
image_info['id_match'].unique()

array(['True'], dtype=object)

In [81]:
image_info['channel_match'] = np.where(image_info['Channel.@LUTName'] == image_info['Channel.@LUTName_X'], 'True', 'False')

In [82]:
image_info['channel_match'].unique()

array(['True'], dtype=object)

In [83]:
# Drop duplicated and verification columns
image_info.drop(['FileName_X', 'Image.@Name_X', 'Image.@UniqueID_X', 'Channel.@LUTName_X', 'file_match', 'serie_match', 'id_match', 
                                      'channel_match', '_merge'], axis=1, inplace=True)

In [84]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 17 columns):
 #   Column                         Non-Null Count  Dtype   
---  ------                         --------------  -----   
 0   file                           1188 non-null   object  
 1   serie                          1188 non-null   object  
 2   channel                        1188 non-null   int64   
 3   nbr_frames                     1188 non-null   int64   
 4   shape_X                        1188 non-null   int64   
 5   shape_Y                        1188 non-null   int64   
 6   channel_count                  1188 non-null   int64   
 7   Image.@UniqueID                1188 non-null   object  
 8   Channel.@LUTName               1188 non-null   category
 9   Dimension.@DimID_X             1188 non-null   Int64   
 10  Dimension.@NumberOfElements_X  1188 non-null   Int64   
 11  Dimension.@Length_X            1188 non-null   float64 
 12  Dimension.@Unit_X              118

### Add additional information to the `image_info`dataframe necessary for the image analysis

`Zoom` = field of view  
`Pixel Size` and `Scan Speed` --> `PixelDwellTime` = the time the laser remains on a pixel  

In [85]:
# Get the information of interest
setting_meta = image_meta.iloc[:, list(range(0,3)) + list(range(10,11)) + list(range(94,95)) + list(range(98,99)) + list(range(119,120)) + list(range(164,172))]

In [86]:
setting_meta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2695 entries, 0 to 2694
Data columns (total 15 columns):
 #   Column                                                           Non-Null Count  Dtype   
---  ------                                                           --------------  -----   
 0   FileName                                                         2695 non-null   object  
 1   Image.@Name                                                      2680 non-null   object  
 2   Image.@UniqueID                                                  2680 non-null   object  
 3   Channel.@LUTName                                                 2680 non-null   category
 4   ATLConfocalSettingDefinition.@ScanSpeed                          2680 non-null   float64 
 5   ATLConfocalSettingDefinition.@Zoom                               2680 non-null   float64 
 6   ATLConfocalSettingDefinition.@PixelDwellTime                     2680 non-null   float64 
 7   ATLConfocalSettingDefinition.Dete

In [87]:
# Remove the duplicated rows that are due to the differente dimensions (2 dimensions for each channel)
setting_meta.drop_duplicates(inplace=True, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [88]:
# Drop the row for which there is no image and no image ID
setting_meta.dropna(axis=0, how='all', subset=['Image.@Name', 'Image.@UniqueID', 'Channel.@LUTName'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [89]:
# Select the columns containing relevant information
setting_meta.drop(['ATLConfocalSettingDefinition.DetectorList.Detector.@ScanType', 'ATLConfocalSettingDefinition.DetectorList.Detector.@Channel', 'ATLConfocalSettingDefinition.DetectorList.Detector.@ChannelName',
                   'ATLConfocalSettingDefinition.DetectorList.Detector.@IsActive'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [90]:
setting_meta.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1190
Data columns (total 11 columns):
 #   Column                                                      Non-Null Count  Dtype   
---  ------                                                      --------------  -----   
 0   FileName                                                    1188 non-null   object  
 1   Image.@Name                                                 1188 non-null   object  
 2   Image.@UniqueID                                             1188 non-null   object  
 3   Channel.@LUTName                                            1188 non-null   category
 4   ATLConfocalSettingDefinition.@ScanSpeed                     1188 non-null   float64 
 5   ATLConfocalSettingDefinition.@Zoom                          1188 non-null   float64 
 6   ATLConfocalSettingDefinition.@PixelDwellTime                1188 non-null   float64 
 7   ATLConfocalSettingDefinition.DetectorList.Detector.@Name    1179 non-null   ca

In [91]:
# Merge the setting dataframe with the image_info datfarame
image_info = image_info.merge(setting_meta, how='inner', left_on=['file', 'serie', 'Image.@UniqueID', 'Channel.@LUTName'], right_on=['FileName', 'Image.@Name', 'Image.@UniqueID', 'Channel.@LUTName'], indicator=True)

In [92]:
image_info['_merge'].unique()

[both]
Categories (1, object): [both]

In [93]:
image_info['file_match'] = np.where(image_info['file'] == image_info['FileName'], 'True', 'False')

In [94]:
image_info['file_match'].unique()

array(['True'], dtype=object)

In [95]:
image_info['serie_match'] = np.where(image_info['serie'] == image_info['Image.@Name'], 'True', 'False')

In [96]:
image_info['serie_match'].unique()

array(['True'], dtype=object)

In [97]:
image_info.drop(['file_match', 'serie_match', '_merge', 'FileName', 'Image.@Name'], axis=1, inplace=True)

In [98]:
image_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 0 to 1187
Data columns (total 24 columns):
 #   Column                                                      Non-Null Count  Dtype   
---  ------                                                      --------------  -----   
 0   file                                                        1188 non-null   object  
 1   serie                                                       1188 non-null   object  
 2   channel                                                     1188 non-null   int64   
 3   nbr_frames                                                  1188 non-null   int64   
 4   shape_X                                                     1188 non-null   int64   
 5   shape_Y                                                     1188 non-null   int64   
 6   channel_count                                               1188 non-null   int64   
 7   Image.@UniqueID                                             1188 non-null   ob

In [99]:
# Save current `image_info` dataframe to parquet:
image_info.to_parquet('image_info_dataframe.parquet')

In [100]:
image_arrays = sorted(glob.glob('../notebooks/image_processing/image_arrays/*.npy'))

In [101]:
image_arrays

[]

In [102]:
shape_dict = image_info.groupby(['shape_X', 'shape_Y']).indices

In [103]:
name_list = []
shape_list = []
arrays = dict()

for indices, array in zip(shape_dict, image_arrays):
    name_list.append(Path(array).stem)
    shape_list.append(indices)
    images = np.load(array)
    for idx, img in zip(shape_dict[indices], images):
        arrays.update({idx:img})

In [104]:
len(arrays)

0

In [105]:
name_list

[]

In [106]:
shape_list

[]

## Select for the samples with intracellular *Listeria*

### Select the rows of interest

In [107]:
df_probes = image_info[image_info['file'].str.contains('Probes')]
df_sample = image_info[image_info['file'].str.contains('Sample')]

In [108]:
# Combine all the images of interest in one dataframe
df_eukaryote = pd.concat([df_probes, df_sample], axis=0, join='outer', ignore_index=False)

In [109]:
df_eukaryote.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 484 entries, 168 to 455
Data columns (total 24 columns):
 #   Column                                                      Non-Null Count  Dtype   
---  ------                                                      --------------  -----   
 0   file                                                        484 non-null    object  
 1   serie                                                       484 non-null    object  
 2   channel                                                     484 non-null    int64   
 3   nbr_frames                                                  484 non-null    int64   
 4   shape_X                                                     484 non-null    int64   
 5   shape_Y                                                     484 non-null    int64   
 6   channel_count                                               484 non-null    int64   
 7   Image.@UniqueID                                             484 non-null    ob

### Save the dataframe to parquet

In [110]:
df_eukaryote.to_parquet('df_eukaryote.parquet')

### Check the dataframe

In [111]:
euk = pd.read_parquet('df_eukaryote.parquet')

In [112]:
euk.head(10)

Unnamed: 0,file,serie,channel,nbr_frames,shape_X,shape_Y,channel_count,Image.@UniqueID,Channel.@LUTName,Dimension.@DimID_X,...,Dimension.@NumberOfElements_Y,Dimension.@Length_Y,Dimension.@Unit_Y,ATLConfocalSettingDefinition.@ScanSpeed,ATLConfocalSettingDefinition.@Zoom,ATLConfocalSettingDefinition.@PixelDwellTime,ATLConfocalSettingDefinition.DetectorList.Detector.@Name,ATLConfocalSettingDefinition.DetectorList.Detector.@Type,ATLConfocalSettingDefinition.DetectorList.Detector.@Gain,ATLConfocalSettingDefinition.DetectorList.Detector.@Offset
168,200316_1_NoProbes.lif,Image001,0,1,512,512,4,07e1048b-6795-11ea-a94d-002432168676,Blue,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 3,PMT,600.289921,0.0
169,200316_1_NoProbes.lif,Image001,1,1,512,512,4,07e1048b-6795-11ea-a94d-002432168676,Green,1,...,512,0.000185,m,400.0,1.0,3e-06,HyD4 SMD,HyD,350.666885,-0.006667
170,200316_1_NoProbes.lif,Image001,2,1,512,512,4,07e1048b-6795-11ea-a94d-002432168676,Gray,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT Trans,PMT,350.537881,0.0
171,200316_1_NoProbes.lif,Image001,3,1,512,512,4,07e1048b-6795-11ea-a94d-002432168676,Red,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 5,PMT,600.404364,0.0
172,200316_1_NoProbes.lif,Image002,0,1,512,512,4,30830004-6795-11ea-a94d-002432168676,Blue,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 3,PMT,600.289921,0.0
173,200316_1_NoProbes.lif,Image002,1,1,512,512,4,30830004-6795-11ea-a94d-002432168676,Green,1,...,512,0.000185,m,400.0,1.0,3e-06,HyD4 SMD,HyD,350.666885,-0.006667
174,200316_1_NoProbes.lif,Image002,2,1,512,512,4,30830004-6795-11ea-a94d-002432168676,Gray,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT Trans,PMT,350.537881,0.0
175,200316_1_NoProbes.lif,Image002,3,1,512,512,4,30830004-6795-11ea-a94d-002432168676,Red,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 5,PMT,600.404364,0.0
256,200316_3_Triton_Probes.lif,Image001,0,1,632,632,4,fbbd1930-6793-11ea-a94d-002432168676,Blue,1,...,632,3.7e-05,m,400.0,5.000013,3e-06,PMT 3,PMT,600.289921,0.0
257,200316_3_Triton_Probes.lif,Image001,1,1,632,632,4,fbbd1930-6793-11ea-a94d-002432168676,Green,1,...,632,3.7e-05,m,400.0,5.000013,3e-06,HyD4 SMD,HyD,350.666885,-0.006667


In [113]:
euk.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 484 entries, 168 to 455
Data columns (total 24 columns):
 #   Column                                                      Non-Null Count  Dtype   
---  ------                                                      --------------  -----   
 0   file                                                        484 non-null    object  
 1   serie                                                       484 non-null    object  
 2   channel                                                     484 non-null    int64   
 3   nbr_frames                                                  484 non-null    int64   
 4   shape_X                                                     484 non-null    int64   
 5   shape_Y                                                     484 non-null    int64   
 6   channel_count                                               484 non-null    int64   
 7   Image.@UniqueID                                             484 non-null    ob

In [138]:
euk.loc[43]

file                                                                          200316_Sample_12.lif
serie                                                                                     Image006
channel                                                                                          3
nbr_frames                                                                                       1
shape_X                                                                                        632
shape_Y                                                                                        632
channel_count                                                                                    4
Image.@UniqueID                                               1b0253c9-6781-11ea-a94d-002432168676
Channel.@LUTName                                                                               Red
Dimension.@DimID_X                                                                               1
Dimension.

In [118]:
euk['file'].unique()

array(['200316_1_NoProbes.lif', '200316_3_Triton_Probes.lif',
       '200310_Probes.lif', '200316_Sample_10.lif',
       '200316_Sample_12.lif', '200316_Sample_5.lif',
       '200316_Sample_4.lif', '200316_Sample_6.lif',
       '200311_Sample_1.lif', '200311_Sample_2.lif',
       '200311_Sample_4.lif', '200311_Sample_9.lif',
       '200311_Sample_11.lif'], dtype=object)

In [140]:
euk[euk['file']=='200316_1_NoProbes.lif'].index

Int64Index([168, 169, 170, 171, 172, 173, 174, 175], dtype='int64')

In [137]:
euk[euk['file']=='200310_Probes.lif']

Unnamed: 0,file,serie,channel,nbr_frames,shape_X,shape_Y,channel_count,Image.@UniqueID,Channel.@LUTName,Dimension.@DimID_X,...,Dimension.@NumberOfElements_Y,Dimension.@Length_Y,Dimension.@Unit_Y,ATLConfocalSettingDefinition.@ScanSpeed,ATLConfocalSettingDefinition.@Zoom,ATLConfocalSettingDefinition.@PixelDwellTime,ATLConfocalSettingDefinition.DetectorList.Detector.@Name,ATLConfocalSettingDefinition.DetectorList.Detector.@Type,ATLConfocalSettingDefinition.DetectorList.Detector.@Gain,ATLConfocalSettingDefinition.DetectorList.Detector.@Offset
1052,200310_Probes.lif,Image001,0,1,512,512,4,f23725aa-62c4-11ea-a946-002432168676,Blue,1,...,512,3.7e-05,m,400.0,5.000013,3e-06,PMT 3,PMT,600.308995,-0.073333
1053,200310_Probes.lif,Image001,1,1,512,512,4,f23725aa-62c4-11ea-a946-002432168676,Green,1,...,512,3.7e-05,m,400.0,5.000013,3e-06,HyD4 SMD,HyD,349.980225,-0.006667
1054,200310_Probes.lif,Image001,2,1,512,512,4,f23725aa-62c4-11ea-a946-002432168676,Gray,1,...,512,3.7e-05,m,400.0,5.000013,3e-06,PMT Trans,PMT,400.034333,0.0
1055,200310_Probes.lif,Image001,3,1,512,512,4,f23725aa-62c4-11ea-a946-002432168676,Red,1,...,512,3.7e-05,m,400.0,5.000013,3e-06,PMT 5,PMT,600.862135,0.0
1056,200310_Probes.lif,Image002,0,1,512,512,4,fb3a707e-62c4-11ea-a946-002432168676,Blue,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 3,PMT,600.308995,-0.073333
1057,200310_Probes.lif,Image002,1,1,512,512,4,fb3a707e-62c4-11ea-a946-002432168676,Green,1,...,512,0.000185,m,400.0,1.0,3e-06,HyD4 SMD,HyD,349.980225,-0.006667
1058,200310_Probes.lif,Image002,2,1,512,512,4,fb3a707e-62c4-11ea-a946-002432168676,Gray,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT Trans,PMT,400.034333,0.0
1059,200310_Probes.lif,Image002,3,1,512,512,4,fb3a707e-62c4-11ea-a946-002432168676,Red,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 5,PMT,600.862135,0.0
1060,200310_Probes.lif,Image003,0,1,512,512,4,19cd9973-62c5-11ea-a946-002432168676,Blue,1,...,512,0.000185,m,400.0,1.0,3e-06,PMT 3,PMT,600.308995,-0.073333
1061,200310_Probes.lif,Image003,1,1,512,512,4,19cd9973-62c5-11ea-a946-002432168676,Green,1,...,512,0.000185,m,400.0,1.0,3e-06,HyD4 SMD,HyD,349.980225,-0.006667
