**Description:** Repair a specific type of video file corruption encountered in the larval schooling project. The process does not salvage corrupted frames, however it produces videos that are readable by Ethovision and most video players. 

In [None]:
import os
from glob import glob
from converter import Converter
import pandas as pd
import numpy as np

raw_dir = '../../raw_videos/short'
repaired_dir = os.path.join(raw_dir,'repaired')

# Detect

In [None]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

file_info_list = []
conv = Converter()
for f1 in input_files:
    fn,ext = os.path.splitext(os.path.basename(f1))
    probe = conv.probe(f1)
    file_info = { 'file':fn, 'status':'', 
                  'duration (seconds)':probe.video.duration, 
                  'fps':probe.video.video_fps,
                  'size (bytes)':os.stat(f1).st_size }
    if probe.video.bitrate is None:
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if os.path.exists(f2):
            file_info['status'] = 'repaired'
            probe2 = conv.probe(f2)
            file_info['duration (seconds)'] = probe2.video.duration
            file_info['fps'] = probe2.video.video_fps
            file_info['size (bytes)'] = os.stat(f2).st_size
        else:
            file_info['status'] = 'corrupt'
            file_info['duration (seconds)'] = np.nan
            file_info['fps'] = np.nan
    else:
        file_info['status'] = 'valid'
    file_info_list.append(file_info)

# Convert to a dataframe.
df = pd.DataFrame(file_info_list)
df['duration (seconds)'] = np.around(df['duration (seconds)'],2)
df['fps'] = np.around(df['fps'],2)
df['size per frame (kB)'] = np.around(df['size (bytes)']/(1024*df['duration (seconds)']*df['fps']),1)

# display(df.head())

# Save as an excel spreadsheet.
writer = pd.ExcelWriter('repair-info_.xlsx')
df.to_excel(writer,sheet_name='sheet1',index=None)
sheet = writer.sheets['sheet1']
for col,width in zip('ABCDEF',[60,8,20,5,13,19]):
    sheet.column_dimensions[col].width = width
writer.save()

In [None]:
# Load from the excel spreadsheet above.
df = pd.read_excel('repair-info_.xlsx')
# df.head()

In [None]:
''' List shortest videos. Target duration = 20 minutes = 1200 seconds. '''

df[df['duration (seconds)']<1200].sort_values('duration (seconds)')

In [None]:
''' List videos with unusual fps. '''

df[np.absolute(df['fps']-30)>0.1].sort_values('fps')

In [None]:
''' List corrupt videos. '''

I = (df['status']=='corrupt')|(df['status']=='repaired')
df[I]

In [None]:
# ''' List unmarked corrupt videos (filename doesn't contain "CORRUPT") '''

# corrupt = df[(df['status']=='corrupt')|(df['status']=='repaired')]['file']
# # display(corrupt)
# unmarked = corrupt[corrupt.apply(lambda x: 'corrupt' not in x.lower())]
# print('\n'.join(unmarked))

# Repair

In [None]:
input_files = sorted(glob(os.path.join(raw_dir,'*.avi')))

conv = Converter()
i = 0
for f1 in input_files:
    if type(conv.probe(f1).video.bitrate)==type(None):
        i += 1
        fn,ext = os.path.splitext(os.path.basename(f1))
        f2 = os.path.join(repaired_dir,fn+'-repaired'+ext)
        if not os.path.exists(f2):
            convert = conv.convert(f1, f2, {
                'format':'avi', 
                'video': {'codec':'copy'},
                })
            for progress in convert:
                print(f'{i}/?   {fn}   {100*progress:.1f}%',end='\r')
            print()

# [old] Repair with opencv

In [None]:
# f1 = input_files[0]

# fn,ext = os.path.splitext(os.path.basename(f1))
# f2 = os.path.join(repaired_dir,fn+'-repaired-cv2'+ext)
# print(f2)

# cap    = cv2.VideoCapture(input_file)
# fps    = int(cap.get(cv2.CAP_PROP_FPS))
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# out    = cv2.VideoWriter( filename = output_file, frameSize = (width,height), 
#                            fourcc = fourcc, fps = fps, isColor = True )
# try:
#     i = 0
#     while True:
#         i += 1
#         print(f'{i}',end='\r')
#         ret,frame = cap.read()
#         if not ret:
#             break
# #         if i%1000==0:
# #             print(output_file[:-4]+f'--{i}.jpg')
# #             cv2.imwrite(output_file[:-4]+f'--{i}.jpg',frame)
#         out.write(frame)
# except:
#     pass
# cap.release()
# out.release()