# Merge Annotation CSV files   
After you run the labeling code, you will have one annotation csv file for each video file.   
For simplicity, you would want to merge them into one, so that for each session, each camera would have just one annotation file. Then, we also want to give each frame a 'frame number' so that we can sync with actual timestamps of audio files. 

In [None]:
import pandas as pd
import glob

### STEP 1. Merging CSV files 
Create a list of all csv files that you want to merge. You have to modify the directory inside the glob function to the place where you kept the annotation files for one camera.   
Then, merge them into one dataframe. 

In [13]:
csv_files = glob.glob('./output_csv/Test_11282023/gopro08/*.{c}'.format(c = 'csv'))
csv_files.sort()
csv_files

['./output_csv/Test_11282023/gopro08/GH010005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH020005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH030005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH040005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH050005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH060005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH070005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH080005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH090005_0.7.csv',
 './output_csv/Test_11282023/gopro08/GH100005_0.7.csv']

In [14]:
df_csv_append = pd.DataFrame()
 
# append the CSV files
for file in csv_files:
    df = pd.read_csv(file)
    df_csv_append = df_csv_append.append(df, ignore_index=True)
    print(len(df))
 
df_csv_append

15900
15900
15900
15900
15900
15900
15900
15900
15900
5639


Unnamed: 0,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m,view_recorder1_1m,view_recorder2_1m,view_recorder1_3m,view_recorder2_3m,view_recorder1_6m,view_recorder2_6m,view_recorder1_9m,view_recorder2_9m
0,0,0,1,0,2,0,3,0,3,0,0,0,1,0,1,0,1
1,1,0,1,0,3,0,3,0,3,0,0,0,1,0,1,0,1
2,2,0,1,0,2,0,2,0,2,0,0,0,1,0,1,0,1
3,3,0,2,0,3,0,3,0,3,0,0,0,1,0,1,0,1
4,4,0,1,0,2,0,2,0,2,0,0,0,1,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148734,5634,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1
148735,5635,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1
148736,5636,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1
148737,5637,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1


### STEP 2. Adding Timestamps and Editing Frame Numbers 
In https://somewes.com/frame-count/, upload the first video and adjust the 'video FPS' and 'console FPS' to 30 seconds. Then, play the video to find the frame where you can see the clock in the video.    
You need the frame number and the clock time recorded in the video to run the code below.

In [15]:
# Cutting video to start from the clock-showing frame
start_frame = 7
df_csv_append = df_csv_append.iloc[start_frame:,:]

# Starting Frame Number from 0 
df_csv_append['frame'] = range(0, len(df_csv_append))

# Check if 'error' column exists and drop it if it does
if 'error' in df_csv_append.columns:
    df_csv_append.drop(columns=['error'], inplace=True)

df_csv_append.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_csv_append['frame'] = range(0, len(df_csv_append))


Unnamed: 0,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m,view_recorder1_1m,view_recorder2_1m,view_recorder1_3m,view_recorder2_3m,view_recorder1_6m,view_recorder2_6m,view_recorder1_9m,view_recorder2_9m
7,0,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
8,1,0,1,0,2,0,2,0,2,0,0,0,1,0,1,1,1
9,2,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
10,3,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
11,4,0,1,0,2,0,2,0,2,0,0,0,1,0,1,0,1


In [16]:
# Add timestamp
from datetime import datetime, timedelta

# Start time for the timestamps (YYYY, MM, DD, HH, MM, SS)
start_time = datetime(2023, 11, 28, 14, 58, 46)

# Adding the timestamp column
df_csv_append['timestamp'] = [start_time + timedelta(seconds=i) for i in range(len(df_csv_append))]

# Checking for duplicate timestamps
duplicate_timestamps = df_csv_append.duplicated(subset='timestamp').any()

print("Are there any duplicate timestamps? ", duplicate_timestamps)

df_csv_append

Are there any duplicate timestamps?  False


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_csv_append['timestamp'] = [start_time + timedelta(seconds=i) for i in range(len(df_csv_append))]


Unnamed: 0,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m,view_recorder1_1m,view_recorder2_1m,view_recorder1_3m,view_recorder2_3m,view_recorder1_6m,view_recorder2_6m,view_recorder1_9m,view_recorder2_9m,timestamp
7,0,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1,2023-11-28 14:58:46
8,1,0,1,0,2,0,2,0,2,0,0,0,1,0,1,1,1,2023-11-28 14:58:47
9,2,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1,2023-11-28 14:58:48
10,3,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1,2023-11-28 14:58:49
11,4,0,1,0,2,0,2,0,2,0,0,0,1,0,1,0,1,2023-11-28 14:58:50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148734,148727,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,2023-11-30 08:17:33
148735,148728,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,2023-11-30 08:17:34
148736,148729,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1,2023-11-30 08:17:35
148737,148730,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1,2023-11-30 08:17:36


In [17]:
# Reordering columns to put 'timestamp' as the first column
cols = ['timestamp'] + [col for col in df_csv_append.columns if col != 'timestamp']
df_csv_append = df_csv_append[cols]

df_csv_append

Unnamed: 0,timestamp,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m,view_recorder1_1m,view_recorder2_1m,view_recorder1_3m,view_recorder2_3m,view_recorder1_6m,view_recorder2_6m,view_recorder1_9m,view_recorder2_9m
7,2023-11-28 14:58:46,0,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
8,2023-11-28 14:58:47,1,0,1,0,2,0,2,0,2,0,0,0,1,0,1,1,1
9,2023-11-28 14:58:48,2,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
10,2023-11-28 14:58:49,3,0,1,0,2,0,2,0,3,0,0,0,1,0,1,0,1
11,2023-11-28 14:58:50,4,0,1,0,2,0,2,0,2,0,0,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148734,2023-11-30 08:17:33,148727,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1
148735,2023-11-30 08:17:34,148728,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1
148736,2023-11-30 08:17:35,148729,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1
148737,2023-11-30 08:17:36,148730,0,0,0,0,2,0,2,0,0,0,0,1,0,1,0,1


In [18]:
df_csv_append.to_csv('/media/backup_SSD/ASPEDv2/Test_11072023/Test_11282023/Labels/11-28-camF.csv', index= False)

## Read CSV as DF, and edit timestamp   
Only in case you want to modify the timestamp, run the code below.

In [62]:
import pandas as pd

file = "/media/backup_SSD/ASPEDv1.1/Labels/Session_6072023/GTgopro04/6-7-D-merged.csv"
df_csv_append = pd.DataFrame()
df_csv_append = pd.read_csv(file)

print(len(df_csv_append))
df_csv_append

155479


Unnamed: 0,timestamp,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m
0,06/07/2023 17:27:00,0,0,0,0,0,0,0,0,0
1,06/07/2023 17:27:01,1,0,0,0,0,0,0,0,0
2,06/07/2023 17:27:02,2,0,0,0,0,1,0,1,0
3,06/07/2023 17:27:03,3,0,0,0,0,2,0,2,0
4,06/07/2023 17:27:04,4,0,0,2,0,2,0,2,0
...,...,...,...,...,...,...,...,...,...,...
155474,06/09/2023 12:38:14,155474,0,0,0,0,0,0,0,0
155475,06/09/2023 12:38:15,155475,0,0,0,0,0,0,0,0
155476,06/09/2023 12:38:16,155476,0,0,0,0,0,0,0,0
155477,06/09/2023 12:38:17,155477,0,0,0,0,0,0,0,0


In [63]:
# Add timestamp

from datetime import datetime, timedelta

# Start time for the timestamps
start_time = datetime(2023, 6, 7, 17, 27, 0)

# Adding the timestamp column
df_csv_append['timestamp'] = [start_time + timedelta(seconds=i) for i in range(len(df_csv_append))]

# Checking for duplicate timestamps
duplicate_timestamps = df_csv_append.duplicated(subset='timestamp').any()

print("Are there any duplicate timestamps? ", duplicate_timestamps)

# Reordering columns to put 'timestamp' as the first column
cols = ['timestamp'] + [col for col in df_csv_append.columns if col != 'timestamp']
df_csv_append = df_csv_append[cols]

df_csv_append

Are there any duplicate timestamps?  False


Unnamed: 0,timestamp,frame,recorder1_1m,recorder2_1m,recorder1_3m,recorder2_3m,recorder1_6m,recorder2_6m,recorder1_9m,recorder2_9m
0,2023-06-07 17:27:00,0,0,0,0,0,0,0,0,0
1,2023-06-07 17:27:01,1,0,0,0,0,0,0,0,0
2,2023-06-07 17:27:02,2,0,0,0,0,1,0,1,0
3,2023-06-07 17:27:03,3,0,0,0,0,2,0,2,0
4,2023-06-07 17:27:04,4,0,0,2,0,2,0,2,0
...,...,...,...,...,...,...,...,...,...,...
155474,2023-06-09 12:38:14,155474,0,0,0,0,0,0,0,0
155475,2023-06-09 12:38:15,155475,0,0,0,0,0,0,0,0
155476,2023-06-09 12:38:16,155476,0,0,0,0,0,0,0,0
155477,2023-06-09 12:38:17,155477,0,0,0,0,0,0,0,0


In [64]:
df_csv_append.to_csv('/media/backup_SSD/ASPEDv1.1/Labels/Session_6072023/GTgopro04/6-7-D.csv', index= False)