# Final Project: this script extracts x,y coordinates and cell_id info from multiple Posxx-xx.txt files, and corresponding Posxx{channel}.tif movie, resulting in Posxx{channel}.mp4 tracking videos

## 1) Link to input data:

https://github.com/zyang20/final-project
   (Download folder 'ROI' and 'Movie')

## 2) Import your data

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import imageio #Read the multiframe.tiff movie
from IPython.display import HTML
%matplotlib inline

print("\033[1m" + 'Input A) All the movie files named Pos*.tif from folder Movie: \n' + "\033[0m", 
      [file for file in Path('Movie').glob('Pos*.tif')])
print('\n')
print("\033[1m" + 'Each movie consists of multiple frames and plays like below (one dot represents one object): \n'
      + "\033[0m")

vol = imageio.volread('Movie/Pos001CFP.tif')# define movie path 'Movie/Pos100CFP.tif'
nslices,x_size,y_size = vol.shape #number of elements along each axis

fig = plt.figure()
ims= []
# for each frame of PosxxCFP.tif movie
for j in range(nslices):  
    image=vol[j]#starts from slice 0
    im = plt.imshow(image,'gray',animated=True)
    plt.axis('off')
    plt.title('Pos001CFP.tif')
    plt.tight_layout()
    ims.append([im])
   
ani = animation.ArtistAnimation(fig, ims, interval=100, blit=True,repeat_delay=3000)
#ani.save('dynamic_images.mp4')
plt.close()
from IPython.display import HTML
HTML(ani.to_html5_video())

[1mInput A) All the movie files named Pos*.tif from folder Movie: 
[0m [WindowsPath('Movie/Pos001CFP.tif'), WindowsPath('Movie/Pos002CFP.tif')]


[1mEach movie consists of multiple frames and plays like below (one dot represents one object): 
[0m


In [2]:
print("\033[1m" + 'Input B) All the log files named Pos*-*.txt from folder ROI: \n' + "\033[0m", 
      [file for file in Path('ROI').glob('Pos*-*.txt')])
print('\n')
print("\033[1m" + 'Each log file contains detailed tracking information for a object listed below \n'
      + "\033[0m")

import pandas as pd
from pathlib import Path

def Import_ROI_data(ROI_dir_name):
    #Find all 'Posxx-xx.txt files in the sub_folder ROI_dir_name' and extract info from filename and contents
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        logs = logs.append(data, ignore_index=True)
    return logs

print(Import_ROI_data('ROI').head())

[1mInput B) All the log files named Pos*-*.txt from folder ROI: 
[0m [WindowsPath('ROI/Pos001-001.txt'), WindowsPath('ROI/Pos001-002.txt'), WindowsPath('ROI/Pos001-003.txt'), WindowsPath('ROI/Pos001-004.txt'), WindowsPath('ROI/Pos001-005.txt'), WindowsPath('ROI/Pos001-006.txt'), WindowsPath('ROI/Pos001-007.txt'), WindowsPath('ROI/Pos001-008.txt'), WindowsPath('ROI/Pos001-009.txt'), WindowsPath('ROI/Pos001-010.txt'), WindowsPath('ROI/Pos001-011.txt'), WindowsPath('ROI/Pos001-012.txt'), WindowsPath('ROI/Pos001-013.txt'), WindowsPath('ROI/Pos001-014.txt'), WindowsPath('ROI/Pos001-015.txt'), WindowsPath('ROI/Pos001-016.txt'), WindowsPath('ROI/Pos001-017.txt'), WindowsPath('ROI/Pos001-018.txt'), WindowsPath('ROI/Pos001-019.txt'), WindowsPath('ROI/Pos001-020.txt'), WindowsPath('ROI/Pos001-021.txt'), WindowsPath('ROI/Pos001-022.txt'), WindowsPath('ROI/Pos001-023.txt'), WindowsPath('ROI/Pos002-001.txt'), WindowsPath('ROI/Pos002-002.txt'), WindowsPath('ROI/Pos002-003.txt'), WindowsPath('ROI/P

## 3) Show me the head of your data

#### Extract tracking information for all tracked objects from log files and append to one dataframe, including 'frame','X_coordinate','y_coordinate','Movie_name','cell_id' , and delete duplicates

In [3]:
import pandas as pd
from pathlib import Path

def Import_ROI_data(ROI_dir_name):
    #Find all 'Posxx-xx.txt files in the sub_folder ROI_dir_name' and extract info from filename and contents
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        #add a new column that stores filenames
        data.insert(0,'name',file.stem)
        logs = logs.append(data, ignore_index=True)
    return logs

logs = Import_ROI_data('ROI')
# extract 'frame' 'Position' 'cell_id' from the files
logs['frame'] = (logs['Label'].str.extract(r'tif.[0-9]{1,4}-[0-9]{1,4}-[0-9]{1,4}:(?P<frame>\d{1,3})')).astype(int)
logs[['Position','cell_id']] = logs['name'].str.extract(r'(?P<Pos>Pos[0-9]{1,3})-(?P<id>[0-9]{1,3})')
logs_output = logs.drop_duplicates(subset=['name', 'frame'],keep='last',ignore_index=True)

# find out the duplicates and only keep the last one
duplicateRowsDF = logs[logs.duplicated(subset=['name', 'frame'],keep='last')]
print("Duplicate Rows based on a single column are:", duplicateRowsDF, sep='\n')

Duplicate Rows based on a single column are:
            name                              Label  Area         X        Y  \
98    Pos001-002  Pos001CFP-2.tif:0029-0695-0590:29   360   590.408  696.250   
99    Pos001-002  Pos001CFP-2.tif:0030-0681-0635:30   176   635.574  681.489   
901   Pos001-015  Pos001CFP-1.tif:0026-0534-1018:26     1  1018.500  534.500   
1175  Pos001-019  Pos001CFP-2.tif:0073-0416-0522:73   610   522.495  416.569   
1892  Pos002-006  Pos002CFP-2.tif:0073-0335-0587:73   319   586.472  335.133   
2177  Pos002-010  Pos002CFP-2.tif:0073-0777-0113:73   326   113.218  777.270   

      Division  frame Position cell_id  
98           0     29   Pos001     002  
99           0     30   Pos001     002  
901          0     26   Pos001     015  
1175         0     73   Pos001     019  
1892         0     73   Pos002     006  
2177         0     73   Pos002     010  


In [4]:
logs_output.head()

Unnamed: 0,name,Label,Area,X,Y,Division,frame,Position,cell_id
0,Pos001-001,Pos001CFP-2.tif:0001-0651-0302:1,473,302.196,651.117,0,1,Pos001,1
1,Pos001-001,Pos001CFP-2.tif:0002-0688-0324:2,436,324.42,688.135,0,2,Pos001,1
2,Pos001-001,Pos001CFP-2.tif:0003-0692-0335:3,472,334.661,691.919,1,3,Pos001,1
3,Pos001-001,Pos001CFP-2.tif:0004-0680-0328:4,317,328.197,679.162,0,4,Pos001,1
4,Pos001-001,Pos001CFP-2.tif:0005-0672-0317:5,190,317.247,672.684,0,5,Pos001,1


## 4) Show me the shape of your data

In [5]:
shape = logs_output.shape
print (f'logs_output data has {shape[0]} rows and {shape[1]} columns')

logs_output data has 2246 rows and 9 columns


## 5) Goal: Label all tracked_objects in the corresponding frames and make tracking video 

In [6]:
from IPython.display import Video
Video("Tracking_videos/Pos001CFP.mp4",embed=True,width=500,height=500)

## 6) Final code

In [7]:
from pathlib import Path
import pandas as pd

import numpy as np
import imageio #Read the multiframe.tiff movie
import matplotlib.pyplot as plt
#%matplotlib inline
from matplotlib import cm #colormap

import matplotlib
matplotlib.use('Agg')
import matplotlib.animation as manimation
# For the 1st time, install FFMPEG by uncommeting the line below
# pip install imageio-ffmpeg
            
def make_trackingvideos(ROI_dir_name,movie_dir_name,track_dir_name,interval, channel):
    """xxxxxxx
    Input examples:
          ROI_dir_name = 'ROI'
          movie_dir_name = 'Movie'
          track_dir_name = 'Trackingvideos'
          interval = 20
          channel = 'CFP'
    Output examples:
          In folder 'Trackingvideos', PosxxxCFP.mp4 are generated with all tracked cells labeled in their 
          corresponding coordinates
          """
    
    # Find all {Posxx-xx.txt} files in the sub_folder {ROI_dir_name} and extract info from filename and contents
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        # add a new column to store filenames
        data.insert(0,'name',file.stem)
        # store all {Posxx-xx.txt} files to dataframe {logs}
        logs = logs.append(data, ignore_index=True)

    # extract 'frame' 'Position' 'cell_id' from the files
    logs['frame'] = (logs['Label'].str.extract(r'tif.[0-9]{1,4}-[0-9]{1,4}-[0-9]{1,4}:(?P<frame>\d{1,3})')).astype(int)
    logs[['Position','cell_id']] = logs['name'].str.extract(r'(?P<Pos>Pos[0-9]{1,3})-(?P<id>[0-9]{1,3})')
    logs_output = logs.drop_duplicates(subset=['name', 'frame'],keep='last',ignore_index=True)

    # find out the duplicates and only keep the last one
    duplicateRowsDF = logs[logs.duplicated(subset=['name', 'frame'],keep='last')]
    print("Duplicate Rows based on a single column are:", duplicateRowsDF, sep='\n')

    #####################################################################################################################
    
    # Label all movies with cell_id at correspording coordinates and save to {track_dir_name}
    track_dir = Path(track_dir_name) # can't make a path if it exists already
    if track_dir.exists():
        print(f"{track_dir} exists")
    else:
        print(f"Make {track_dir}")
        track_dir.mkdir()
    
    unique_position = logs_output['Position'].unique()
    
    #for each PosxxCFP.tif movie
    for pos in unique_position:
        movie_name = f'{pos}{channel}.tif'             
        movie_dir = Path(movie_dir_name)#Make a path object
        vol = imageio.volread(movie_dir / movie_name)# define movie path 'Movie/Pos100CFP.tif'
        nslices,x_size,y_size = vol.shape #number of elements along each axis
        # assign a unique label color for each cell
        num_cells_this_position = (logs_output[(logs_output['Position']=='Pos001')])['cell_id'].nunique()                                                              
        cellid_color = cm.rainbow(np.linspace(0,1,num_cells_this_position))
        
        FFMpegWriter = manimation.writers['ffmpeg']
        metadata = dict(title = movie_name, artist='Matplotlib',comment='ZY!')
        writer = FFMpegWriter(fps=5,metadata=metadata)

        fig = plt.figure()
    
        
        video_name = movie_name.replace(".tif",".mp4")       
        video_path = str(track_dir / video_name)
        with writer.saving(fig, video_path, 300):
        #(fig,movie name, resolution)

            # for each frame of PosxxCFP.tif movie
            for j in range(nslices):  
                image=vol[j]#starts from slice 0
                plt.imshow(image,'gray')
                plt.axis('off')
                plt.tight_layout()

                # time stamp for this frame
                style=dict(size=14,color='white')
                time_label = f'{(j*interval/60):.1f}hr' 
                plt.text(250,50,time_label,ha='right',**style)

                # label all cells for this frame
                if (j+1 in logs_output[(logs_output['Position']=='Pos001')]['frame'])==True:
                    this_frame_df = logs_output[(logs_output['Position']==pos) & (logs_output['frame']==j+1)]         
                    Xs = this_frame_df['X']
                    Ys = this_frame_df['Y']     
                    cell_ids = this_frame_df['cell_id']

                    for x,y,cell_id in zip(Xs,Ys,cell_ids):
                        plt.annotate(str(int(cell_id)), # this is the text
                         (x,y), # this is the point to label
                         color = cellid_color[int(cell_id)-1],
                         fontsize = 8,
                         textcoords="offset points", # how to position the text
                         xytext=(1,1), # distance from text to points (x,y)
                         ha='center') # horizontal alignment can be left, right or center
                    
                else:
                    continue
                writer.grab_frame()  
                fig.clear()
            print(f'Done with {video_name}')

## 7) Define and inputs and Conduct the final code

In [8]:
ROI_dir_name = 'ROI'
movie_dir_name='Movie'
track_dir_name = 'Tracking_videos'
channel = 'CFP'
interval = 20 # in the unit of min

make_trackingvideos(ROI_dir_name,movie_dir_name,track_dir_name,interval, channel)

Duplicate Rows based on a single column are:
            name                              Label  Area         X        Y  \
98    Pos001-002  Pos001CFP-2.tif:0029-0695-0590:29   360   590.408  696.250   
99    Pos001-002  Pos001CFP-2.tif:0030-0681-0635:30   176   635.574  681.489   
901   Pos001-015  Pos001CFP-1.tif:0026-0534-1018:26     1  1018.500  534.500   
1175  Pos001-019  Pos001CFP-2.tif:0073-0416-0522:73   610   522.495  416.569   
1892  Pos002-006  Pos002CFP-2.tif:0073-0335-0587:73   319   586.472  335.133   
2177  Pos002-010  Pos002CFP-2.tif:0073-0777-0113:73   326   113.218  777.270   

      Division  frame Position cell_id  
98           0     29   Pos001     002  
99           0     30   Pos001     002  
901          0     26   Pos001     015  
1175         0     73   Pos001     019  
1892         0     73   Pos002     006  
2177         0     73   Pos002     010  
Tracking_videos exists
Done with Pos001CFP.mp4
Done with Pos002CFP.mp4


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>