In [1]:
import os
from glob import glob
from converter import Converter
import pandas as pd

raw_dir = '../raw_videos'
repaired_dir = os.path.join(raw_dir,'repaired')

# Detect

In [74]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

file_info_list = []
conv = Converter()
for f1 in input_files:
    fn,ext = os.path.splitext(os.path.basename(f1))
    probe = conv.probe(f1)
    file_info = {'file':fn, 'duration (seconds)': probe.video.duration}
    if type(probe.video.bitrate)==type(None):
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if os.path.exists(f2):
            file_info['status'] = 'repaired'
            file_info['duration (seconds)'] = conv.probe(f2).video.duration
        else:
            file_info['status'] = 'corrupt'
            file_info['duration (seconds)'] = '?'
    else:
        file_info['status'] = 'valid'
    file_info_list.append(file_info)

# Convert to a dataframe.
df = pd.DataFrame(file_info_list)
# df

In [76]:
# Save as an excel spreadsheet.
writer = pd.ExcelWriter('repair-info.xlsx')
df.to_excel(writer,sheet_name='sheet1',index=None)
sheet = writer.sheets['sheet1']
for col,width in zip('AB',[60,10]):
    sheet.column_dimensions[col].width = width
writer.save()

# Load from the excel spreadsheet above.
df = pd.read_excel('repair-info.xlsx')
df

Unnamed: 0,file,duration (seconds),status
0,Pa_Fri_14dpf_GroupA_n2_20200612_1400,1155.421779,valid
1,Pa_Fri_14dpf_GroupA_n2b_20200612_1300_10FPS,1635.381900,valid
2,Pa_Fri_14dpf_GroupA_n2b_20200612_1400,1280.367196,valid
3,Pa_Fri_14dpf_GroupB_n2_20200612_1430,1310.820225,valid
4,Pa_Fri_14dpf_GroupB_n2b_20200612_1430,1486.426656,valid
...,...,...,...
562,pa_Mon_28dpf_GroupB_n2_20200713_1300,1243.286488,valid
563,pa_Mon_28dpf_GroupC_n1a_20200727_1430,1212.902296,valid
564,pa_Mon_28dpf_GroupC_n2_20200727_1330,1562.187240,valid
565,pa_Mon_28dpf_GroupD_n1_20200727_1210,1294.259968,valid


In [78]:
''' List the shortest videos. '''

df.sort_values('duration (seconds)')[:20]

Unnamed: 0,file,duration (seconds),status
549,SF_Sun_7dpf_GroupD_n2b_20200802_1720_CORRUPT,0.033333,repaired
546,SF_Sun_7dpf_GroupD_n1b_20200802_1755_CORRUPT,133.598664,repaired
262,Pa_Sun_42dpf_GroupBa_n2a_20200719_1710_SHORT,437.71947,valid
100,Pa_Mon_21dpf_GroupB_n5_20200713_1500_SHORT,453.790812,valid
103,Pa_Mon_21dpf_GroupC_n5_20200713_1115_SHORT,475.185994,valid
104,Pa_Mon_21dpf_GroupD_n5_20200713_1400_SHORT,486.765346,valid
96,Pa_Mon_21dpf_GroupA_n1b_20200713_1320_SHORT,500.307864,valid
253,Pa_Sun_42dpf_GroupAb_n1a_20200719_1600_SHORT,516.41248,valid
259,Pa_Sun_42dpf_GroupBa_n1a_20200719_1640_SHORT,517.144508,valid
102,Pa_Mon_21dpf_GroupC_n2b_20200713_1200_SHORT,534.114248,valid


In [73]:
''' List unmarked corrupt videos (filename doesn't contain "CORRUPT") '''

corrupt = df[(df['status']=='corrupt')|(df['status']=='repaired')]['file']
# display(corrupt)
unmarked = corrupt[corrupt.apply(lambda x: 'corrupt' not in x.lower())]
print('\n'.join(unmarked))

Pa_Mon_7dpf_GroupA_n5_20200706_1140
Pa_Sun_21dpf_GroupC_n1d_20200621_1630
Pa_Sun_21dpf_GroupC_n1e_20200621_1630
Pa_Sun_21dpf_GroupC_n5_20200621_1630
Pa_Sun_28dpf_GroupA_n1d_20200705_1650
Pa_Sun_28dpf_GroupA_n1e_20200705_1650
Pa_Sun_28dpf_GroupA_n1f_20200705_1650
Pa_Sun_42dpf_GroupBb_n2a_20200719_1745
Pa_Sun_42dpf_GroupBb_n2b_20200719_1745
SF_Fri_21dpf_GroupB_n2a_20200703_1440
SF_Fri_21dpf_GroupB_n2b_20200703_1440
SF_Fri_28dpf_GroupC_n1a_20200717_1700
SF_Fri_28dpf_GroupC_n2a_20200717_1610
SF_Fri_28dpf_GroupC_n5_20200717_1610
SF_Sat_28dpf_GroupD_n1a_20200627_1800
SF_Sat_28dpf_GroupD_n1b_20200627_1800
SF_Sat_28dpf_GroupD_n1c_20200627_1800
SF_Sat_7dpf_GroupA_n2a_20200606_1200
SF_Sat_7dpf_GroupA_n2b_20200606_1200
SF_Sat_7dpf_GroupA_n5_20200606_1200
SF_Sun_28dpf_GroupC_n1_20200823_1725
SF_Sun_28dpf_GroupC_n2_20200823_1725
SF_Sun_28dpf_GroupC_n5_20200823_1725
SF_Sun_7dpf_GroupA_n5_20200802_1530
SF_Sun_7dpf_GroupC_n2a_20200802_1640
SF_Sun_7dpf_GroupC_n2b_20200802_1640
SF_Sun_7dpf_GroupC_n5_202

# Repair

In [34]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

conv = Converter()
i = 0
for f1 in input_files:
    if type(conv.probe(f1).video.bitrate)==type(None):
        i += 1
        fn,ext = os.path.splitext(os.path.basename(f1))
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if not os.path.exists(f2):
            convert = conv.convert(f1, f2, {
                'format':'avi', 
                'video': {'codec':'copy'},
                })
            for progress in convert:
                print(f'{i}/{len(corrupt)}   {fn}   {100*progress:.1f}%',end='\r')
            print()

1/43   Pa_Fri_7dpf_GroupB_n2_20200605_1230_CORRUPT   99.2%
2/43   Pa_Fri_7dpf_GroupC_n2_20200605_1300_CORRUPT   99.9%
3/43   Pa_Fri_7dpf_GroupC_n2b_20200605_1300_CORRUPT   98.9%
4/43   Pa_Fri_7dpf_GroupC_n5_20200605_1300_CORRUPT   98.7%
5/43   Pa_Mon_7dpf_GroupA_n5_20200706_1140   99.3%
6/43   Pa_Sun_21dpf_GroupC_n1d_20200621_1630   100.0%
7/43   Pa_Sun_21dpf_GroupC_n1e_20200621_1630   99.8%
8/43   Pa_Sun_21dpf_GroupC_n5_20200621_1630   99.8%
9/43   Pa_Sun_28dpf_GroupA_n1d_20200705_1650   99.3%
10/43   Pa_Sun_28dpf_GroupA_n1e_20200705_1650   99.9%
11/43   Pa_Sun_28dpf_GroupA_n1f_20200705_1650   99.8%
12/43   Pa_Sun_42dpf_GroupBb_n2a_20200719_1745   99.1%
13/43   Pa_Sun_42dpf_GroupBb_n2b_20200719_1745   99.0%
14/43   Pa_Sun_7dpf_GroupB_n5a_20200614_1455_CORRUPT   99.0%
15/43   SF_Fri_21dpf_GroupB_n2a_20200703_1440   99.9%
16/43   SF_Fri_21dpf_GroupB_n2b_20200703_1440   98.8%
17/43   SF_Fri_28dpf_GroupC_n1a_20200717_1700   3918999.3%
18/43   SF_Fri_28dpf_GroupC_n2a_20200717_1610   99.9%


# [old] Repair with opencv

In [None]:
# f1 = input_files[0]

# fn,ext = os.path.splitext(os.path.basename(f1))
# f2 = os.path.join(repaired_dir,fn+'-repaired-cv2'+ext)
# print(f2)

# cap    = cv2.VideoCapture(input_file)
# fps    = int(cap.get(cv2.CAP_PROP_FPS))
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# out    = cv2.VideoWriter( filename = output_file, frameSize = (width,height), 
#                            fourcc = fourcc, fps = fps, isColor = True )
# try:
#     i = 0
#     while True:
#         i += 1
#         print(f'{i}',end='\r')
#         ret,frame = cap.read()
#         if not ret:
#             break
# #         if i%1000==0:
# #             print(output_file[:-4]+f'--{i}.jpg')
# #             cv2.imwrite(output_file[:-4]+f'--{i}.jpg',frame)
#         out.write(frame)
# except:
#     pass
# cap.release()
# out.release()