# DS Video Count

In [1]:
import os
import re
import json
import pandas as pd
from pathlib import Path
from datetime import datetime

In [2]:
# Create a function to find all Jupyter Notebook files in local GitHub repository ignoring .ipynb_checkpoint files.
def find_notebooks(root_path, extensions):
    notebooks = []
    for ext in extensions:
        notebooks.extend([path for path in Path(root_path).rglob(f"*.{ext}") if ".ipynb_checkpoints" not in str(path)])
    return notebooks


In [3]:
# Create a function to count the number of Vimeo and YouTube videos embedded in a Jupyter Notebook file.
def count_videos(notebook_path):
    vimeo_pattern = r"VimeoVideo\('\d+"
    youtube_pattern = r"YouTubeVideo\('\d+"

    with open(notebook_path, "r", encoding="utf-8") as f:
        notebook_data = json.load(f)

    vimeo_count = 0
    youtube_count = 0

    for cell in notebook_data["cells"]:
        if cell["cell_type"] == "code":
            code = "".join(cell["source"])
            vimeo_count += len(re.findall(vimeo_pattern, code))
            youtube_count += len(re.findall(youtube_pattern, code))

    return vimeo_count, youtube_count


In [4]:
# Define the extensions of notebook files you want to search for and call the find_notebooks function.
notebook_extensions = ["ipynb"]
root_dir = "."  # The root directory of your local GitHub repository
notebooks = find_notebooks(root_dir, notebook_extensions)


In [5]:
# Create a Pandas DataFrame to store the video count data and write it to a CSV file.
video_counts = []

for notebook in notebooks:
    vimeo_count, youtube_count = count_videos(notebook)
    video_counts.append({
        "notebook": str(notebook),
        "vimeo_count": vimeo_count,
        "youtube_count": youtube_count,
    })

video_counts_df = pd.DataFrame(video_counts)
video_counts_df.to_csv("ds-video-catalog.csv", index=False)
