In [None]:
import json
import networkx as nx
import re
import community as community_louvain
import shutil
from pathlib import Path
from netwulf import visualize

In [None]:
roam_json_path = Path("~/Dropbox/roam-bak/roamnotes/json/roamwsy.json").expanduser()

In [None]:
source_markdown_path = Path("~/Dropbox/roam-bak/roamnotes/markdown/").expanduser()

In [None]:
target_markdown_path = Path("~/Dropbox/obsidian").expanduser()/"subset_markdowns"

In [None]:
query_term = "Roam Research"

In [None]:
def str_in_custom_filter(mystr, stopwords):
    for stopword in stopwords:
        if re.fullmatch(stopword, mystr):
            return True
    return False

In [None]:
def analyze_page(page, graph, custom_filter=False, stopwords=None):
    regex = r"'string': '.*?\[\[(.+?)\]\].*?',"
    test_str = str(page)
    matches = re.findall(regex, test_str, re.MULTILINE)
    source = page['title']
    if not custom_filter: # no filter
        for target in matches:
            graph.add_edge(source, target)
    else: # use custom filter
        if str_in_custom_filter(source, stopwords): # source is todo, done or daily page
            pass
        else: # source is common page
            for target in matches:
                if not str_in_custom_filter(target, stopwords): # target is not todo, done or daily page
                    graph.add_edge(source, target)

In [None]:
with open(roam_json_path) as f:
    data = json.load(f)

In [None]:
print(len(data))

In [None]:
nx_graph = nx.DiGraph()

In [None]:
for page in data:
    analyze_page(page, nx_graph)

In [None]:
bi_nx_graph = nx_graph.to_undirected()

In [None]:
partition = community_louvain.best_partition(bi_nx_graph)

In [None]:
clutser_query = partition[query_term]

In [None]:
cluster_nodes = []

In [None]:
for node in bi_nx_graph:
    if partition[node] == clutser_query:
        cluster_nodes.append(node)

In [None]:
print(len(cluster_nodes))

In [None]:
if not target_markdown_path.exists():
    target_markdown_path.mkdir()

In [None]:
for node in cluster_nodes:
    mdfilename = f"{node}.md"
    try:
        shutil.copy2(source_markdown_path/mdfilename, target_markdown_path/mdfilename)
    except:
        pass

In [None]:
sub_nx_graph = nx_graph.subgraph(cluster_nodes)

In [None]:
visualize(sub_nx_graph)