# Data Extraction from YouTube

In this notebook we download a monocular endoscopic surgery video.
The video source is: https://www.youtube.com/watch?v=6niL7Poc_qQ.
We separate the video into individual frames and save them to `data/surgical_video/` and create a PyTorch dataloader to load frames of the video. 

In [None]:
from pytube import YouTube
import os
import torch
import pandas as pd
from skimage import io, transform
import imageio
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [None]:
video_url = 'https://www.youtube.com/watch?v=6niL7Poc_qQ'
output_dir = 'surgical_video'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
filename = YouTube(video_url).streams.first().download()

In [None]:
vid = imageio.get_reader(filename,  'ffmpeg')

In [None]:
for frame in range(340, 1916):
    image = vid.get_data(frame)
    imageio.imwrite("{0}/frame_{1:04d}.png".format(output_dir, int(frame)-340), image)
    

In [None]:
class SinusVideoDataset(Dataset):
    """Endoscopy Dataset."""

    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len([name for name in os.listdir(self.root_dir) if os.path.isfile(self.root_dir+name)])
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, "frame_{0:04d}.png".format(idx))
        image = io.imread(img_name)
        sample = {'image': image}

        if self.transform:
            sample = self.transform(sample)

        return sample