# Jupyter Notebooks Contained in DS-Student-Resources

In [1]:
# Import the necessary libraries 
import os
import pandas as pd
import glob
from pathlib import Path
import csv
from datetime import datetime
import mimetypes

In [2]:
# function to recursively find all Jupyter notebooks in local GitHub repository ignoring .ipynb_checkpoint files
def find_notebooks(root_path, extensions):
    notebooks = []
    for ext in extensions:
        notebooks.extend([path for path in Path(root_path).rglob(f"*.{ext}") if ".ipynb_checkpoints" not in str(path)])
    return notebooks

In [3]:
# Define the Jupyter notebook extension you want to search for and call the find_notebooks function.
notebook_extensions = ["ipynb"]
root_dir = "."  # The root directory of your local GitHub repository
notebooks = find_notebooks(root_dir, notebook_extensions)

In [4]:
# Create a function to extract the necessary metadata from each Jupyter notebook.
def get_notebook_metadata(notebook_path):
    metadata = {
        "filename": notebook_path.name,
        "path": str(notebook_path),
        "size": notebook_path.stat().st_size,
        "modified_date": datetime.fromtimestamp(notebook_path.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
    }
    return metadata

In [5]:
# Create a Pandas DataFrame to store the Jupyter notebook metadata and write it to a CSV file.
notebook_metadata = [get_notebook_metadata(nb) for nb in notebooks]
metadata_df = pd.DataFrame(notebook_metadata)
metadata_df.to_csv("ds-jupyter-notebooks.csv", index=False)

In [6]:
# list the metadata contained in the metadata_df
metadata_df.head()

Unnamed: 0,filename,path,size,modified_date
0,DS-Notebook-Catalog.ipynb,DS-Notebook-Catalog.ipynb,8152,2023-05-15 18:01:55
1,DS-Data-Catalog.ipynb,DS-Data-Catalog.ipynb,9983,2023-05-27 07:31:08
2,DS-Video-Catalog.ipynb,DS-Video-Catalog.ipynb,3933,2023-05-15 18:03:29
3,DS108Questions.ipynb,DS108-Databases/DS108Questions.ipynb,3810,2021-12-27 09:18:13
4,DS108NoSQL-L3-Updating-Documents.ipynb,DS108-Databases/NoSQL/DS108NoSQL-L3-Updating-D...,40601,2023-01-13 15:40:22


In [7]:
# computer and print total size of all Jupyter notebooks
total_size_bytes = metadata_df["size"].sum()
total_size_mb = total_size_bytes / (1024 ** 2)  # Convert bytes to megabytes
print(f"Total size of all Jupyter Notebooks: {total_size_mb:.2f} MB")

Total size of all Jupyter Notebooks: 177.48 MB


In [8]:
# count total number of Jupyter notebooks
total_notebooks = len(notebooks)
print(f"Total number of Jupyter Notebooks: {total_notebooks}")

Total number of Jupyter Notebooks: 765
