In [1]:
import os, fnmatch
import requests
import markdownify
import re
import json
from bs4 import BeautifulSoup
import shutil

In [2]:
code = "AI-3016"
learn_module = "all"

In [3]:
def get_markdown(url, savelocation):
    print("- Retrieving markdown from " + url)

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # might need to adapt this when working with other web pages (not Microsoft Learn)
    div = soup.find(id="unit-inner-section")

    for ul in div.find_all("ul", class_="metadata"):
        ul.decompose()
    for d in div.find_all("div", class_="xp-tag"):
        d.decompose()
    for next in div.find_all("div", class_="next-section"):
        next.decompose()
    for header in div.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
        header.string = "\n# " + header.get_text() + "\n"
    for code in div.find_all("code"):
        code.decompose()

    markdown = markdownify.markdownify(str(div), heading_style="ATX", bullets="-")
    markdown = re.sub('\n{3,}', '\n\n', markdown)
    markdown = markdown.replace("[Continue](/en-us/)", "")

    with open(savelocation, "w", encoding="utf-8") as file:
        file.write(markdown)

    return markdown

In [4]:
with open("./output/LearningPaths.json", "r") as file:
    learning_paths = json.load(file)
    
if not os.path.exists(f"output/{code}"):
    os.mkdir(f"output/{code}")

for lp in learning_paths:
    modules = [module for module in lp["learning_modules"] if module["learning_module"] == learn_module or learn_module == "all"]
    
    iModule = 1
    for module in modules:
        outputFolder_module = f"output/{code}/{iModule}.{module['learning_module']}"
        iModule += 1
        
        if not os.path.exists(outputFolder_module):
            os.mkdir(outputFolder_module)
            
        for index, url in enumerate(module["learning_units"]):
            unit_name = url.split("/")[-1]
            
            outputFile_md = f"{outputFolder_module}/{unit_name}.md"
            markdown = get_markdown(url, outputFile_md)

- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/introduction-to-azure-ai-studio/1-introduction
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/introduction-to-azure-ai-studio/2-what-is-ai-studio
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/introduction-to-azure-ai-studio/3-azure-ai-resources
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/introduction-to-azure-ai-studio/4-when-to-use-ai-studio
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/explore-models-azure-ai-studio/1-introduction
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/explore-models-azure-ai-studio/2-select-model
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/explore-models-azure-ai-studio/3-deploy-model
- Retrieving markdown from https://learn.microsoft.com/en-us/training/modules/explore-models-azure-ai-studio