In [1]:
import requests
from pydantic import BaseModel, Field
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
def fetch_text_from_url(url: str):
    url = f"https://r.jina.ai/{url}"
    headers = {
        "Accept": "application/json",
        "Authorization": "Bearer jina_0fee2f762d8c40b48252fcd9d8d5b3feZwXbGRzxzIT-glgzZCNhO9J9-CwG",
        "X-Return-Format": "markdown",
        "X-Timeout": "30",
    }

    response = requests.get(url, headers=headers)
    return response.json()

In [3]:
QUERY = "How to build a digital brain?"
DOCS = [
    "https://fortelabs.com/blog/basboverview/",
    "https://zettelkasten.de/overview/",
    "https://blog.alexanderfyoung.com/the-perfect-mindmap-4-step-framework/",
    "https://www.goodnotes.com/blog/note-taking-methods",
]

fetched_docs = [fetch_text_from_url(d) for d in DOCS]
parsed_docs = [d["data"]["content"] for d in fetched_docs]

In [4]:
llm = OpenAI()


class Concept(BaseModel):
    name: str
    description: str = Field(
        ..., description="A short one sentence description of the concept."
    )
    exists_in_documents: list[int] = Field(
        ..., description="The indices of the documents in in which this concept exists."
    )


class DocumentSetConcepts(BaseModel):
    concepts: list[Concept]


SYSTEM_PROMPT = """Given a user QUERY and a set of DOCUMENTS - you must parse RELEVANT high level concepts from the documents,
which would be useful in answering and/or exploring the user's QUERY.
There should be overlapping concepts between the documents which help tie them together.
"""

SYSTEM_PROMPT = """Given a user QUERY and a set of DOCUMENTS - your goal is to extract concepts which can be used to create a mindmap.
We want to create relationships between high level abstract concepts, that would aid information absorption by the human brain.
"""


def get_document_concepts(query, docs):
    completion = llm.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT,
            },
            {
                "role": "user",
                "content": f"The QUERY: {query}\n The DOCUMENTS:\n{'\n'.join(docs)}",
            },
        ],
        response_format=DocumentSetConcepts,
    )

    return completion.choices[0].message.parsed


class DocumentSummary(BaseModel):
    summary: str


def get_document_summary(query, doc):
    completion = llm.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": "Generate a short summary of the provided DOCUMENT which is conditioned on the user's QUERY.",
            },
            {
                "role": "user",
                "content": f"The QUERY:\n{query}\nThe DOCUMENT:\n{doc}",
            },
        ],
        response_format=DocumentSummary,
    )

    return completion.choices[0].message.parsed


In [5]:
concepts = get_document_concepts(QUERY, parsed_docs)
concepts = concepts.model_dump()["concepts"]
# summaries = [get_document_summary(QUERY, d) for d in parsed_docs]

In [6]:
import networkx as nx
from pyvis.network import Network

documents = [
    {"index": i, "title": d["data"]["title"], "url": d["data"]["url"]}
    for i, d in enumerate(fetched_docs)
]

G = nx.Graph()

for doc in documents:
    G.add_node(
        f"Article_{doc['index']}",
        label=doc["title"],
        title=doc["title"],
        shape="box",
        color="#1f78b4",
        url=doc["url"],
    )

for concept in concepts:
    G.add_node(
        concept["name"],
        label=concept["name"],
        title=concept["description"],
        shape="ellipse",
        color="#33a02c",
    )

for concept in concepts:
    concept_name = concept["name"]
    for doc_index in concept["exists_in_documents"]:
        article_node = f"Article_{doc_index}"
        G.add_edge(concept_name, article_node)

net = Network(height="750px", width="100%", bgcolor="#f0f0f0", font_color="black")
net.from_nx(G)
net.force_atlas_2based()
net.show_buttons(filter_=["physics"])
net.save_graph("concepts_articles_graph.html")

In [7]:
concepts

[{'name': 'Digital Brain',
  'description': 'An external system for organizing information to enhance memory and creativity.',
  'exists_in_documents': [0]},
 {'name': 'Second Brain Methodology',
  'description': 'A method for developing a digital brain using tools and frameworks to manage information.',
  'exists_in_documents': [0]},
 {'name': 'Zettelkasten Method',
  'description': 'A note-taking and organization system to create a web of connected knowledge.',
  'exists_in_documents': [1, 3]},
 {'name': 'Mind Mapping',
  'description': 'A visual tool used for organizing and connecting knowledge in a spatial format.',
  'exists_in_documents': [2, 3]},
 {'name': 'Note-Taking Techniques',
  'description': 'Various methods to take and organize notes for effective learning and recall.',
  'exists_in_documents': [3]},
 {'name': 'Memory Retention',
  'description': 'The ability to effectively recall information by organizing and connecting it in meaningful ways.',
  'exists_in_documents': 

In [8]:
documents

[{'index': 0,
  'title': 'Building a Second Brain: The Definitive Introductory Guide',
  'url': 'https://fortelabs.com/blog/basboverview/'},
 {'index': 1,
  'title': 'Getting Started • Zettelkasten Method',
  'url': 'https://zettelkasten.de/overview/'},
 {'index': 2,
  'title': 'The Perfect Mind Map - 4 Step Framework',
  'url': 'https://blog.alexanderfyoung.com/the-perfect-mindmap-4-step-framework/'},
 {'index': 3,
  'title': 'The Best Note-Taking Methods for College Students & Serious Note-takers | Goodnotes Blog',
  'url': 'https://www.goodnotes.com/blog/note-taking-methods'}]