# DS Notebook Catalog

In [1]:
import os
import pandas as pd
import glob
from pathlib import Path
import csv
from datetime import datetime
import mimetypes

In [13]:
def find_notebooks(root_path, extensions):
    notebooks = []
    for ext in extensions:
        notebooks.extend([path for path in Path(root_path).rglob(f"*.{ext}") if ".ipynb_checkpoints" not in str(path)])
    return notebooks

In [14]:
def get_notebook_metadata(notebook_path):
    metadata = {
        "filename": notebook_path.name,
        "path": str(notebook_path),
        "size": notebook_path.stat().st_size,
        "modified_date": datetime.fromtimestamp(notebook_path.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
    }
    return metadata



In [15]:
notebook_extensions = ["ipynb"]
root_dir = "."  # The root directory of your local GitHub repository
notebooks = find_notebooks(root_dir, notebook_extensions)

In [16]:
notebook_metadata = [get_notebook_metadata(nb) for nb in notebooks]
metadata_df = pd.DataFrame(notebook_metadata)
metadata_df.to_csv("ds-jupyter-notebooks.csv", index=False)

In [19]:
metadata_df.head()

Unnamed: 0,filename,path,size,modified_date
0,DS-Notebook-Catalog.ipynb,DS-Notebook-Catalog.ipynb,2898,2023-04-06 12:27:57
1,DS-Data-Catalog.ipynb,DS-Data-Catalog.ipynb,4784,2023-04-06 10:17:08
2,DS-Video-Catalog.ipynb,DS-Video-Catalog.ipynb,3854,2023-04-06 12:50:18
3,DS108Questions.ipynb,DS108-Databases/DS108Questions.ipynb,3810,2021-12-27 09:18:13
4,DS108NoSQL-L3-Updating-Documents.ipynb,DS108-Databases/NoSQL/DS108NoSQL-L3-Updating-Documents.ipynb,40601,2023-01-13 15:40:22


In [18]:
total_size_bytes = metadata_df["size"].sum()
total_size_gb = total_size_bytes / (1024 ** 2)  # Convert bytes to megabytes
print(f"Total size of all Jupyter Notebooks: {total_size_gb:.2f} MB")

Total size of all Jupyter Notebooks: 162.37 MB


In [20]:
total_notebooks = len(notebooks)
print(f"Total number of Jupyter Notebooks: {total_notebooks}")

Total number of Jupyter Notebooks: 742
