# Chrome Bookmarks

In [None]:
import logging
logging.basicConfig(level = logging.INFO)

from os import environ

from pymongo import MongoClient, errors
from pymongo.collection import Collection

from pathlib import Path
import json

In [None]:
mongo: Collection = MongoClient(environ["MONGO_URL"])['daily']['chrome_raw']
'id_1' not in mongo.index_information() and mongo.create_index([('id', 1)], unique = True)
# TODO: Stop this line outputting the index name

In [None]:
# TODO: Handle massive amounts of bookmarks

bookmarks_file = Path.cwd() / 'data' / 'Bookmarks'
folder_path = environ.get('CHROME_BOOKMARKS_PATH').split('/')

logging.info(f'Reading bookmarks from {bookmarks_file}...')

with open(bookmarks_file, 'r', encoding='utf-8') as file:
    bookmarks_data = json.load(file)

def find_folder(bookmarks, folder_path):
    if not folder_path:
        return bookmarks
    current_folder_name = folder_path.pop(0)
    for item in bookmarks:
        if item['type'] == 'folder' and item['name'] == current_folder_name:
            return find_folder(item['children'], folder_path)
    return []

bookmarks_list = []
for root_key in ['bookmark_bar', 'other', 'synced']:
    root_bookmarks = bookmarks_data['roots'].get(root_key, {}).get('children', [])
    bookmarks_list.extend(find_folder(root_bookmarks, folder_path.copy()))

logging.info(f'Logging {len(bookmarks_list)} bookmark(s)...')

try:
    mongo.insert_many(bookmarks_list, ordered = False)
except errors.BulkWriteError:
    logging.warning("Duplicate bookmark(s) detected")

In [None]:
import pandas as pd

In [None]:
data = list(mongo.find())

df = pd.DataFrame(data)

df.to_csv('bookmarks.csv', index = False)