In [11]:
import os
from glob import glob
from converter import Converter
import pandas as pd
import numpy as np

raw_dir = '../raw_videos'
repaired_dir = os.path.join(raw_dir,'repaired')

# Detect

In [46]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

file_info_list = []
conv = Converter()
for f1 in input_files:
    fn,ext = os.path.splitext(os.path.basename(f1))
    probe = conv.probe(f1)
    file_info = { 'file':fn, 'status':'', 
                  'duration (seconds)':probe.video.duration, 
                  'fps':probe.video.video_fps }
    if probe.video.bitrate is None:
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if os.path.exists(f2):
            file_info['status'] = 'repaired'
            probe2 = conv.probe(f2)
            file_info['duration (seconds)'] = probe2.video.duration
            file_info['fps'] = probe2.video.video_fps
        else:
            file_info['status'] = 'corrupt'
            file_info['duration (seconds)'] = np.nan
            file_info['fps'] = np.nan
    else:
        file_info['status'] = 'valid'
    file_info_list.append(file_info)

# Convert to a dataframe.
df = pd.DataFrame(file_info_list)
df['duration (seconds)'] = np.around(df['duration (seconds)'],2)
df['fps'] = np.around(df['fps'],2)
# display(df.head())

# Save as an excel spreadsheet.
writer = pd.ExcelWriter('repair-info.xlsx')
df.to_excel(writer,sheet_name='sheet1',index=None)
sheet = writer.sheets['sheet1']
for col,width in zip('AB',[60,10]):
    sheet.column_dimensions[col].width = width
writer.save()

In [58]:
# Load from the excel spreadsheet above.
df = pd.read_excel('repair-info.xlsx')
# df.head()

In [50]:
''' List shortest videos. Target duration = 20 minutes = 1200 seconds. '''

df[df['duration (seconds)']<1200].sort_values('duration (seconds)')

Unnamed: 0,file,status,duration (seconds),fps
344,RC_Sat_21dpf_GroupB_n2_20201024_1502,repaired,0.2,30.02
341,RC_Sat_21dpf_GroupA_n1a_20201024_1422,repaired,3.6,30.02
347,RC_Sat_21dpf_GroupE_n5_20201024_1543,repaired,4.63,30.02
187,Pa_Mon_84dpf_GroupC_n2_20200921_1210,valid,760.08,30.03
188,Pa_Mon_84dpf_GroupC_n5_20200921_1210,valid,769.83,30.02
661,SF_fri_14dpf_groupB_n5_20200703_1410,valid,1101.33,30.03
660,SF_fri_14dpf_groupA_n5_20200703_1330,valid,1121.14,30.03
324,Pa_Sun_84dpf_GroupBa_n5_20200830_1410,valid,1125.26,30.0
323,Pa_Sun_84dpf_GroupBa_n2b_20200830_1410,valid,1133.68,30.02
322,Pa_Sun_84dpf_GroupBa_n2_20200830_1410,valid,1134.71,30.02


In [56]:
''' List videos with unusual fps. '''

df[np.absolute(df['fps']-30)>0.1].sort_values('fps')

Unnamed: 0,file,status,duration (seconds),fps
1,Pa_Fri_14dpf_GroupA_n2b_20200612_1300_10FPS,valid,1635.38,10.01
9,Pa_Fri_14dpf_groupA_n5_20200612_1300_10FPS,valid,1586.21,10.01
350,RC_Sat_28dpf_GroupA_n1a_20201031_1703,valid,1813.45,12.16
353,RC_Sat_28dpf_GroupD_n2_20201031_1618,valid,1324.31,12.16
355,RC_Sat_28dpf_GroupG_n5_20201031_1535,valid,1437.48,12.16


In [55]:
''' List corrupt videos. '''

I = (df['status']=='corrupt')|(df['status']=='repaired')
df[I]

Unnamed: 0,file,status,duration (seconds),fps
63,Pa_Fri_7dpf_GroupB_n2_20200605_1230_CORRUPT,repaired,1236.56,30.02
66,Pa_Fri_7dpf_GroupC_n2_20200605_1300_CORRUPT,repaired,1346.53,30.02
67,Pa_Fri_7dpf_GroupC_n2b_20200605_1300_CORRUPT,repaired,1502.85,30.0
68,Pa_Fri_7dpf_GroupC_n5_20200605_1300_CORRUPT,repaired,1477.33,30.02
162,Pa_Mon_7dpf_GroupA_n5_20200706_1140,repaired,1466.44,30.02
225,Pa_Sun_21dpf_GroupC_n1d_20200621_1630,repaired,2868.21,30.02
226,Pa_Sun_21dpf_GroupC_n1e_20200621_1630,repaired,2869.54,30.02
230,Pa_Sun_21dpf_GroupC_n5_20200621_1630,repaired,2877.37,30.0
234,Pa_Sun_28dpf_GroupA_n1d_20200705_1650,repaired,2035.31,30.0
235,Pa_Sun_28dpf_GroupA_n1e_20200705_1650,repaired,2032.58,30.02


In [57]:
# ''' List unmarked corrupt videos (filename doesn't contain "CORRUPT") '''

# corrupt = df[(df['status']=='corrupt')|(df['status']=='repaired')]['file']
# # display(corrupt)
# unmarked = corrupt[corrupt.apply(lambda x: 'corrupt' not in x.lower())]
# print('\n'.join(unmarked))

# Repair

In [23]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

conv = Converter()
i = 0
for f1 in input_files:
    if type(conv.probe(f1).video.bitrate)==type(None):
        i += 1
        fn,ext = os.path.splitext(os.path.basename(f1))
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if not os.path.exists(f2):
            convert = conv.convert(f1, f2, {
                'format':'avi', 
                'video': {'codec':'copy'},
                })
            for progress in convert:
                print(f'{i}/{len(corrupt)}   {fn}   {100*progress:.1f}%',end='\r')
            print()

15/54   RC_Sat_14dpf_GroupE_n5_20201017_1435   99.9%
16/54   RC_Sat_14dpf_GroupG_n5_20201017_1435   99.9%
17/54   RC_Sat_21dpf_GroupA_n1a_20201024_1422   99.8%
18/54   RC_Sat_21dpf_GroupB_n2_20201024_1502   114.1%
19/54   RC_Sat_21dpf_GroupE_n5_20201024_1543   100.7%
20/54   RC_Sat_28dpf_GroupA_n1b_20201031_1703   99.9%
21/54   RC_Sat_7dpf_GroupA_n1b_20201010_1425   99.0%
22/54   RC_Sat_7dpf_GroupB_n1b_20201010_1500   100.0%
23/54   RC_Sat_7dpf_GroupB_n5_20201010_1500   99.9%
45/54   SF_Sun_70dpf_GroupC_n2a_20201004_1414   99.0%
46/54   SF_Sun_70dpf_GroupC_n5_20201004_1414   99.4%
53/54   SF_Sun_84dpf_GroupC_n2a_20201018_1404   99.5%
54/54   SF_Sun_84dpf_GroupC_n2b_20201018_1404   100.0%


# [old] Repair with opencv

In [None]:
# f1 = input_files[0]

# fn,ext = os.path.splitext(os.path.basename(f1))
# f2 = os.path.join(repaired_dir,fn+'-repaired-cv2'+ext)
# print(f2)

# cap    = cv2.VideoCapture(input_file)
# fps    = int(cap.get(cv2.CAP_PROP_FPS))
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# out    = cv2.VideoWriter( filename = output_file, frameSize = (width,height), 
#                            fourcc = fourcc, fps = fps, isColor = True )
# try:
#     i = 0
#     while True:
#         i += 1
#         print(f'{i}',end='\r')
#         ret,frame = cap.read()
#         if not ret:
#             break
# #         if i%1000==0:
# #             print(output_file[:-4]+f'--{i}.jpg')
# #             cv2.imwrite(output_file[:-4]+f'--{i}.jpg',frame)
#         out.write(frame)
# except:
#     pass
# cap.release()
# out.release()