## Live coding - play with real data & Weaviate

Let's build our own instance of Weaviate, with real data!

We'll load the data from the Pro Git book<sup>*</sup> and go from there:

> *Pro Git book, written by Scott Chacon and Ben Straub and published by Apress, licensed under the Creative Commons Attribution Non Commercial Share Alike 3.0 license.
>
> - https://git-scm.com/book/en/v2
> - https://github.com/progit/progit2

In [None]:
def get_book_text_objects():
    import requests

    # Source location
    text_objs = list()
    api_base_url = 'https://api.github.com/repos/progit/progit2/contents/book'  # Book base URL
    chapter_urls = ['/01-introduction/sections', '/02-git-basics/sections']  # List of section URLs

    # Loop through book chapters
    for chapter_url in chapter_urls:
        response = requests.get(api_base_url + chapter_url)  # Get the JSON data for the section files in the chapter

        # Loop through inner files (sections)
        for file_info in response.json():
            if file_info['type'] == 'file':  # Only process files (not directories)
                file_response = requests.get(file_info['download_url'])

                # Build objects including metadata
                chapter_title = file_info['download_url'].split('/')[-3]
                filename = file_info['download_url'].split('/')[-1]
                text_obj = {
                    "body": file_response.text,
                    "chapter_title": chapter_title,
                    "filename": filename
                }
                text_objs.append(text_obj)
    return text_objs


def load_text_files():
    from pathlib import Path

    data_dir = Path("data/pro_git/")
    all_files = []

    for file_path in data_dir.rglob("*.asc"):
        if file_path.is_file():
            all_files.append(file_path)

    text_objs = list()
    for fpath in all_files:
        with fpath.open() as f:
            text_obj = {
                "body": f.read(),
                "chapter_title": fpath.parent.name,
                "filename": fpath.name
            }    
            text_objs.append(text_obj)  
            
    return text_objs  


text_objs = get_book_text_objects()
# text_objs = load_text_files()