# Make tracking videos for single cells

Author: Z. Yang

## 1) Link to input data:

https://github.com/zyang20/final-project
   (Download folder 'Movie' and 'ROI')

## 2) Import your data

Input A) Long-term single-cell movies named 'Posxxx.tif' from 'Movie' folder  

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import imageio #Read the multiframe.tiff movie
from IPython.display import HTML
%matplotlib inline

vol = imageio.volread('Movie/Pos001CFP.tif')# define movie path 'Movie/Pos100CFP.tif'
nslices,x_size,y_size = vol.shape #number of elements along each axis

fig = plt.figure()
ims= []
# for each frame of PosxxCFP.tif movie
for j in range(nslices):  
    image=vol[j]#starts from slice 0
    im = plt.imshow(image,'gray',animated=True)
    plt.axis('off')
    plt.title('Pos001CFP.tif')
    plt.tight_layout()
    ims.append([im])
   
ani = animation.ArtistAnimation(fig, ims, interval=100, blit=True,repeat_delay=3000)
#ani.save('dynamic_images.mp4')
plt.close()

In [2]:
#pip install termcolor 
from termcolor import colored
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import imageio #Read the multiframe.tiff movie
from IPython.display import HTML
%matplotlib inline

def playmovie(Movie_dir_name):
    movies_name = [file.name for file in Path(Movie_dir_name).glob('Pos*.tif')]
    print(movies_name, str(movies_name).replace("CFP",".YFP"), str(movies_name).replace("CFP",".RFP") )
    print("\033[1m" + colored('Pos001CFP.tif: Pos001 is the position, CFP is the channel name',color='blue')
          + "\033[0m")

    vol = imageio.volread( Path(Movie_dir_name) / movies_name[0])# define movie path 'Movie/Pos100CFP.tif'
    nslices,x_size,y_size = vol.shape #number of elements along each axis

    fig = plt.figure()
    ims= []
    # for each frame of PosxxCFP.tif movie
    for j in range(nslices):  
        image=vol[j]#starts from slice 0
        im = plt.imshow(image,'gray',animated=True)
        plt.axis('off')
        plt.title(movies_name[0])
        plt.tight_layout()
        ims.append([im])

    ani = animation.ArtistAnimation(fig, ims, interval=100, blit=True,repeat_delay=3000)
    plt.close()
    return HTML(ani.to_html5_video())
    
playmovie('Movie')

['Pos001CFP.tif', 'Pos002CFP.tif'] ['Pos001.YFP.tif', 'Pos002.YFP.tif'] ['Pos001.RFP.tif', 'Pos002.RFP.tif']
[1m[34mPos001CFP.tif: Pos001 is the position, CFP is the channel name[0m[0m


Output Goal:  Make tracking_video with time_stamper and tracked_cells's IDs labeled 

In [3]:
from IPython.display import Video

Video("Tracking_videos/Pos001CFP.mp4",embed=True,width=300,height=300)

Input B) All log files named 'Posxx-xx.txt' from 'ROI' folder

In [4]:
import pandas as pd
from pathlib import Path

def Import_ROI_data(ROI_dir_name):
    #Find all 'Posxx-xx.txt files in the sub_folder ROI_dir_name' and extract info from filename and contents
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        logs = logs.append(data, ignore_index=True)
    return logs

In [5]:
print( [file.name for file in Path('ROI').glob('Pos*-*.txt')])
print("\033[1m" + colored('Pos001-001.txt: Pos001 is the position, 001 is the cell_id',color='blue') + "\033[0m")

['Pos001-001.txt', 'Pos001-002.txt', 'Pos001-003.txt', 'Pos001-004.txt', 'Pos001-005.txt', 'Pos001-006.txt', 'Pos001-007.txt', 'Pos001-008.txt', 'Pos001-009.txt', 'Pos001-010.txt', 'Pos001-011.txt', 'Pos001-012.txt', 'Pos001-013.txt', 'Pos001-014.txt', 'Pos001-015.txt', 'Pos001-016.txt', 'Pos001-017.txt', 'Pos001-018.txt', 'Pos001-019.txt', 'Pos001-020.txt', 'Pos001-021.txt', 'Pos001-022.txt', 'Pos001-023.txt', 'Pos002-001.txt', 'Pos002-002.txt', 'Pos002-003.txt', 'Pos002-004.txt', 'Pos002-005.txt', 'Pos002-006.txt', 'Pos002-007.txt', 'Pos002-008.txt', 'Pos002-009.txt', 'Pos002-010.txt', 'Pos002-011.txt']
[1m[34mPos001-001.txt: Pos001 is the position, 001 is the cell_id[0m[0m


'Posxx-xx.txt': tracking information for one cell as listed below

In [15]:
display(Import_ROI_data('ROI').head())

Unnamed: 0,name,Label,Area,X,Y,Division
0,Pos001-001,Pos001CFP-2.tif:0001-0651-0302:1,473,302.196,651.117,0
1,Pos001-001,Pos001CFP-2.tif:0002-0688-0324:2,436,324.42,688.135,0
2,Pos001-001,Pos001CFP-2.tif:0003-0692-0335:3,472,334.661,691.919,1
3,Pos001-001,Pos001CFP-2.tif:0004-0680-0328:4,317,328.197,679.162,0
4,Pos001-001,Pos001CFP-2.tif:0005-0672-0317:5,190,317.247,672.684,0


Combine all 'Posxx-xx.txt' files to one dataframe

In [7]:
import pandas as pd
from pathlib import Path

def Import_ROI_data(ROI_dir_name):
    """
    Combine all 'Posxx-xx.txt' files from folder {ROI_dir_name} to one dataframe
    Parameters:
        ROI_dir_name: the folder name where 'Posxx-xx.txt' files are stored
    Returns:
        logs: a dataframe with all 'Posxx-xx.txt' files appended
    """
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        #add a new column that stores filenames
        data.insert(0,'name',file.stem)
        logs = logs.append(data, ignore_index=True)
    return logs

The head of coarse dataframe

In [18]:
logs = Import_ROI_data('ROI')
logs.head(3)

Unnamed: 0,name,Label,Area,X,Y,Division,frame,Position,cell_id
0,Pos001-001,Pos001CFP-2.tif:0001-0651-0302:1,473,302.196,651.117,0,1,Pos001,1
1,Pos001-001,Pos001CFP-2.tif:0002-0688-0324:2,436,324.42,688.135,0,2,Pos001,1
2,Pos001-001,Pos001CFP-2.tif:0003-0692-0335:3,472,334.661,691.919,1,3,Pos001,1


Extract tracking information ('frame' 'Position' 'cell_id') from the coarse dataframe

In [9]:
logs['frame'] = (logs['Label'].str.extract(r'tif.[0-9]{1,4}-[0-9]{1,4}-[0-9]{1,4}:(?P<frame>\d{1,3})')).astype(int)
logs[['Position','cell_id']] = logs['name'].str.extract(r'(?P<Pos>Pos[0-9]{1,3})-(?P<id>[0-9]{1,3})')

Delete all duplicates except the last one from the dataframe

In [10]:
duplicateRowsDF = logs[logs.duplicated(subset=['name', 'frame'],keep='last')]
print("Below rows of duplicate frames except the last one are found:\n")
display(duplicateRowsDF)
logs_output = logs.drop_duplicates(subset=['name', 'frame'],keep='last',ignore_index=True)

Below rows of duplicate frames except the last one are found:



Unnamed: 0,name,Label,Area,X,Y,Division,frame,Position,cell_id
98,Pos001-002,Pos001CFP-2.tif:0029-0695-0590:29,360,590.408,696.25,0,29,Pos001,2
99,Pos001-002,Pos001CFP-2.tif:0030-0681-0635:30,176,635.574,681.489,0,30,Pos001,2
901,Pos001-015,Pos001CFP-1.tif:0026-0534-1018:26,1,1018.5,534.5,0,26,Pos001,15
1175,Pos001-019,Pos001CFP-2.tif:0073-0416-0522:73,610,522.495,416.569,0,73,Pos001,19
1892,Pos002-006,Pos002CFP-2.tif:0073-0335-0587:73,319,586.472,335.133,0,73,Pos002,6
2177,Pos002-010,Pos002CFP-2.tif:0073-0777-0113:73,326,113.218,777.27,0,73,Pos002,10


#### 3) The head of refined dataframe

In [11]:
logs_output.head()

Unnamed: 0,name,Label,Area,X,Y,Division,frame,Position,cell_id
0,Pos001-001,Pos001CFP-2.tif:0001-0651-0302:1,473,302.196,651.117,0,1,Pos001,1
1,Pos001-001,Pos001CFP-2.tif:0002-0688-0324:2,436,324.42,688.135,0,2,Pos001,1
2,Pos001-001,Pos001CFP-2.tif:0003-0692-0335:3,472,334.661,691.919,1,3,Pos001,1
3,Pos001-001,Pos001CFP-2.tif:0004-0680-0328:4,317,328.197,679.162,0,4,Pos001,1
4,Pos001-001,Pos001CFP-2.tif:0005-0672-0317:5,190,317.247,672.684,0,5,Pos001,1


## 4) Show me the shape of your data

In [12]:
shape = logs_output.shape
print (f'logs_output data has {shape[0]} rows and {shape[1]} columns')

logs_output data has 2246 rows and 9 columns


## 5) Final script 

In [13]:
## Install FFMPEG by uncommeting the line below if not installed
# pip install imageio-ffmpeg
from pathlib import Path
import pandas as pd
import numpy as np
import imageio #Read the multiframe.tiff movie
import matplotlib.pyplot as plt
from matplotlib import cm #colormap
import matplotlib
matplotlib.use('Agg')
import matplotlib.animation as manimation
            
def make_trackingvideos(ROI_dir_name,movie_dir_name,track_dir_name,interval, channel):
    """
    Extract tracking information for all tracked_cells from 'Posxx-xx.txt' files and label in 
    corresponding 'Posxx.tif' movies, resulting in 'Posxx.mp4' tracking_videos
    
    Parameters example:
      ROI_dir_name = 'ROI'  (folder where 'Posxx-xx.txt' files are stored)
      movie_dir_name = 'Movie' (folder where 'Posxxx.tif' movies are stored)
      track_dir_name = 'Trackingvideos' (folder where 'Posxxx.mp4' tracking_videos will be stored)
      interval = 20 (frame interval in 'Posxxx.tif' movie, in the unit of miniute)
      channel = 'CFP' (channel name for 'Posxxx.tif' movie)
    
    Returns:
      'PosxxxCFP.mp4' tracking_videos with all tracked cells' IDs labeled in their positions
    """
    # Append all 'Posxx-xx.txt' from folder {ROI_dir_name} to one dataframe
    logs = pd.DataFrame()
    ROI_path = Path(ROI_dir_name)

    for file in ROI_path.glob('Pos*-*.txt'):
        data = pd.read_csv(file, delimiter='\s+')
        # add a new column to store filenames
        data.insert(0,'name',file.stem)
        logs = logs.append(data, ignore_index=True)

    # extract 'frame' 'Position' 'cell_id' from the dataframe {logs}
    logs['frame'] = (logs['Label'].str.extract(r'tif.[0-9]{1,4}-[0-9]{1,4}-[0-9]{1,4}:(?P<frame>\d{1,3})')).astype(int)
    logs[['Position','cell_id']] = logs['name'].str.extract(r'(?P<Pos>Pos[0-9]{1,3})-(?P<id>[0-9]{1,3})')
    
    # Discard all duplicates except the last one
    duplicateRowsDF = logs[logs.duplicated(subset=['name', 'frame'],keep='last')]
    print("Duplicate measurements except the last one as shown below will be discarded:\n")
    display(duplicateRowsDF)
    logs_output = logs.drop_duplicates(subset=['name', 'frame'],keep='last',ignore_index=True)

   ###################################################################################################
    
    # Define the folder to store tracking_videos 
    track_dir = Path(track_dir_name) 
    if track_dir.exists():
        print(f"{track_dir} exists")
    else:
        print(f"Make {track_dir}")
        track_dir.mkdir()
    
    # Find corresponding 'Posxxx.tif' movies from {logs_output} dataframe
    unique_position = logs_output['Position'].unique()
    for pos in unique_position:
        movie_name = f'{pos}{channel}.tif'             
        movie_dir = Path(movie_dir_name)
        
        # Read each 'Posxxx.tif' movie
        vol = imageio.volread(movie_dir / movie_name)
        nslices,x_size,y_size = vol.shape #nslices starts from 0
        
        # set up for the video
        FFMpegWriter = manimation.writers['ffmpeg']
        metadata = dict(title = movie_name, artist='Matplotlib',comment='tracking!')
        writer = FFMpegWriter(fps=5,metadata=metadata)
        video_name = movie_name.replace(".tif",".mp4")       
        video_path = str(track_dir / video_name)
        
        # assign a unique label color for each cell
        num_cells = (logs_output[(logs_output['Position']== pos)])['cell_id'].nunique()                                                              
        cellid_color = cm.rainbow(np.linspace(0,1,num_cells))
        mydpi = 300    
        fig = plt.figure(figsize=[x_size/mydpi,y_size/mydpi], dpi=mydpi,tight_layout=True ) 
        
        # writer.saving(fig,movie name, resolution)          
        with writer.saving(fig, video_path, 300):
            
            # for each frame of PosxxCFP.tif movie
            for j in range(nslices):  
                image=vol[j]#starts from slice 0
                plt.imshow(image,'gray')
                plt.axis('off')

                # time stamp for this frame
                style=dict(size=14,color='white')
                time_label = f'{(j*interval/60):.1f}hr' 
                plt.text(250,75,time_label,ha='right',**style)

                # label cell_ids of all tracked_cells for this frame
                if (j+1 in logs_output[(logs_output['Position']==pos)]['frame'].values)==True:
                    this_frame_df = logs_output[(logs_output['Position']==pos) & (logs_output['frame']==j+1)]         
                    Xs = this_frame_df['X']
                    Ys = this_frame_df['Y']     
                    cell_ids = this_frame_df['cell_id']

                    for x,y,cell_id in zip(Xs,Ys,cell_ids):
                        plt.annotate(str(int(cell_id)), # this is the text
                         (x,y), # this is the point to label
                         color = cellid_color[int(cell_id)-1],
                         fontsize = 8,
                         textcoords="offset pixels", # how to position the text
                         xytext=(10,-20), # distance from text to points (x,y)
                         ha='center') # horizontal alignment can be left, right or center
                    
                else:
                    continue
                writer.grab_frame() 
                plt.clf()
            
            print(f'Done with {video_name}')
        plt.close()

## 6) Define input & Run the final script

In [14]:
ROI_dir_name = 'ROI'
movie_dir_name='Movie'
track_dir_name = 'Tracking_videos'
channel = 'CFP'
interval = 20 # in the unit of min
make_trackingvideos(ROI_dir_name,movie_dir_name,track_dir_name,interval, channel)

Duplicate measurements except the last one as shown below will be discarded:



Unnamed: 0,name,Label,Area,X,Y,Division,frame,Position,cell_id
98,Pos001-002,Pos001CFP-2.tif:0029-0695-0590:29,360,590.408,696.25,0,29,Pos001,2
99,Pos001-002,Pos001CFP-2.tif:0030-0681-0635:30,176,635.574,681.489,0,30,Pos001,2
901,Pos001-015,Pos001CFP-1.tif:0026-0534-1018:26,1,1018.5,534.5,0,26,Pos001,15
1175,Pos001-019,Pos001CFP-2.tif:0073-0416-0522:73,610,522.495,416.569,0,73,Pos001,19
1892,Pos002-006,Pos002CFP-2.tif:0073-0335-0587:73,319,586.472,335.133,0,73,Pos002,6
2177,Pos002-010,Pos002CFP-2.tif:0073-0777-0113:73,326,113.218,777.27,0,73,Pos002,10


Tracking_videos exists
Done with Pos001CFP.mp4
Done with Pos002CFP.mp4
