In [3]:
# Convert any Notion database into a CSV ready for analysis or Dataframe
!pip install notion-client

import os
import logging
import collections
import pandas as pd

from notion_client import Client, APIErrorCode, APIResponseError
from pprint import pprint

notion = Client(auth=os.environ["NOTION_TOKEN"])

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
def get_all_db_entries(db_id):
    
    has_more = True
    cursor = None

    while has_more:        
        #If first iteration with no cursor
        if cursor == None:
            db = notion.databases.query( **{ "database_id": db_id})
            cursor = db["next_cursor"]
            has_more = db["has_more"]

        else:
            more_results = notion.databases.query( **{ "database_id": db_id, "start_cursor":cursor})
            db['results'].extend(more_results['results'])
            cursor = more_results['next_cursor']
            has_more = more_results["has_more"]

    return db

In [98]:
def get_notion_page_title(page_id):
    notion_page = notion.pages.retrieve(page_id)
    for key, value in notion_page["properties"].items():
        if value["id"] =="title":
            return value["title"][0]["plain_text"]

In [99]:
def get_notion_page_icon(page_id):
    notion_page = notion.pages.retrieve(page_id)
    if notion_page["icon"] == None:
        return None
    elif notion_page["icon"]["type"] == "emoji":
        return notion_page["icon"]["emoji"]
    else:
        return notion_page["icon"]["file"]["url"]

In [100]:
def serialize_notion_page_icon(icon_field_value_obj):
    if icon_field_value_obj == None:
        return ''
    elif icon_field_value_obj["type"] == "emoji":
        return icon_field_value_obj["emoji"]
    else:
        return icon_field_value_obj["file"]["url"]

In [101]:
def serialize_notion_page_title(page_properties):
    for key, value in page_properties.items():
        if value["id"] =="title":
            return value["title"][0]["plain_text"]

In [102]:
def serialize_notion_relations(relation_field_value_obj, show_icon=True):
    
    relations = []

    for relation in relation_field_value_obj["relation"]:
        icon = get_notion_page_icon(relation["id"])
        title = get_notion_page_title(relation["id"])
        #If the icon contains an image only serialize the title
        if icon == None:
            relations.append(title)
        elif "https://" in  icon and show_icon:
            relations.append(title)
        #Otherwise serialize a combination of Icon + Title
        elif show_icon:
            relations.append(icon + " " + title)
    
    return relations

In [103]:
def serialize_notion_multiselect(multiselect_field_value_obj):
    
    selected_options = []

    for multiselect_option in multiselect_field_value_obj["multi_select"]:
        selected_options.append(multiselect_option["name"])

    return selected_options

In [104]:
def serialize_notion_files(files_field_value_obj):
    
    files = []

    for file_obj in files_field_value_obj["files"]:
        if file_obj["type"] == 'external':
            files.append(file_obj["external"]["url"])
        else:
            files.append(file_obj["file"]["url"])
       
    return files

In [105]:
def serialize_people(people_field_value_obj):
    
    people = []

    for person_obj in people_field_value_obj["people"]:
        people.append(person_obj["name"])

    return people

In [106]:
#Page Object Properties parsing
def parse_page_properties(properties_obj, relations=True, show_icon=True):

    obj = {}

    #TODO
    #Missing: rollup, formula and content of page with block children

    for key, value in properties_obj.items():
        if value['type'] == "title":
            #title is extracted from the page metadata
             continue
        elif value['type'] == "rich_text":
            if len(value["rich_text"]) == 0:
                obj[key] = ''
            else:
                obj[key] = value["rich_text"][0]["plain_text"]
        elif value['type'] == "select":
                if value["select"] == None:
                    obj[key] = ''
                else:
                    obj[key] = value["select"]["name"]
        elif value['type'] == "number":
            if value["number"] == None:
                obj[key] = ''
            else:
                obj[key] = value["number"]
        elif value['type'] == "url":
            obj[key] = value["url"]
        elif value['type'] == "created_by":
            obj[key] = value["created_by"]["name"]
        elif value['type'] == "email":
            obj[key] = value["email"]
        elif value['type'] == "phone_number":
            obj[key] = value["phone_number"]
        elif value['type'] == "checkbox":
            obj[key] = value["checkbox"]
        elif value['type'] == "multi_select":
            obj[key] = serialize_notion_multiselect(value)
        elif value['type'] == "files":
            obj[key] = serialize_notion_files(value)
        elif value['type'] == "people":
            obj[key] = serialize_people(value)
        elif value['type'] == "relation" and relations:
            obj[key] = serialize_notion_relations(value, show_icon)

    return(obj)

def parse_notion_page(entry, relations=True, show_icon=True):
    serialize_page_metadata = { 
    "title" : serialize_notion_page_title(entry["properties"]),
    "icon" : serialize_notion_page_icon(entry["icon"]),
    "created_time" : entry["created_time"],
    "last_edited_time" : entry["last_edited_time"],
    "page_url":entry["url"]
    }

    #Properties, unique to any page, are more complex
    properties_serialized = parse_page_properties(entry["properties"], relations, show_icon)

    #Merge bot dictionaries into a single one
    entire_page = {**serialize_page_metadata, **properties_serialized}
    entire_page_ordered = collections.OrderedDict(entire_page)

    return entire_page_ordered

def notion_db_to_df(db_id, relations=True, show_icon=True):
    notion_db = get_all_db_entries(db_id)
    pages = []
    for result in notion_db["results"]:
        pages.append(parse_notion_page(result, relations, show_icon))
    df = pd.DataFrame(pages)
    return df

def notion_to_csv(db_id, name='notion_table',relations=True, show_icon=True):
    df= notion_db_to_df(db_id, relations, show_icon)
    path = name+'.csv'
    df.to_csv(path, index=False)

In [107]:
#Taxonomy Table: 2fbdd8aba1604e2385ed7be3a59d1984
#Concepts Table: 763f8356f41c45e3934950696f00fa21
#Movies 4870a2649b6c4d3784fc8a24196ea690
#Podcasts c1f6b77ce50a47b7b1e524c923079c17

notion_to_csv("4870a2649b6c4d3784fc8a24196ea690")

df = notion_db_to_df("4870a2649b6c4d3784fc8a24196ea690")
df

Unnamed: 0,title,icon,created_time,last_edited_time,page_url,Link To Watch,Period,Category,IMDB Score,Type,...,IMDB,Country,My Score,JustWatch,Concepts,Platform,Why watch it,Country Based,Categories,Main Characters Economic Class
0,Amor - Filmin,,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/Amor-Filmin-1c0851aa02bb...,https://www.filmin.es/catalogo/tema/las-favori...,[],[],,,...,,[],,,[],[],,[],[],[]
1,Yesterday (2019),,2021-12-29T08:34:00.000Z,2022-01-02T21:29:00.000Z,https://www.notion.so/Yesterday-2019-352310c28...,https://www.imdb.com/title/tt8079248/?ref_=nv_...,[2020],[],6.8,Movie,...,https://www.imdb.com/title/tt8079248/?ref_=nv_...,[],6,,[],[],,"[United States 🇺🇸, United Kingdom 🇬🇧]",[],[]
2,Gattaca,,2021-12-29T08:34:00.000Z,2022-01-02T21:29:00.000Z,https://www.notion.so/Gattaca-40a619a02b8347c6...,,[],[],,Movie,...,,[],7,,[],[],,[],[],[]
3,The Darjeeling Limited (2007),,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/The-Darjeeling-Limited-2...,https://www.imdb.com/title/tt0838221/?ref_=ref...,[],[],7.2,,...,https://www.imdb.com/title/tt0838221/?ref_=ref...,[],,,[],[Apple TV],,[],[],[]
4,Lo que arde (2019),,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/Lo-que-arde-2019-4ff9924...,https://www.imdb.com/title/tt8212958/,[2020],[],6.8,Movie,...,https://www.imdb.com/title/tt8212958/,[Spain 🇪🇸],8,https://www.imdb.com/title/tt8212958/,[],[Apple TV],,[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,WALL·E (2008),,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/WALL-E-2008-6c14201f2325...,,[],[],8.4,Movie,...,https://www.imdb.com/title/tt0910970/,[],9,,[☀️ Good Future Days - Utopia],[Disney +],,[],[],[]
111,The Young Karl Marx,,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/The-Young-Karl-Marx-bde8...,,[1850],[],6.6,Movie,...,https://www.imdb.com/title/tt1699518/,[],7,,[🇧🇳 Capitalism vs Socialism],[Filmin],,"[France 🇫🇷, United Kingdom 🇬🇧, Germany 🇩🇪]",[],[]
112,Mapa,,2021-12-29T08:34:00.000Z,2021-12-29T08:34:00.000Z,https://www.notion.so/Mapa-f60a9ed87cba452dac4...,https://www.filmin.es/pelicula/mapa,[],[💑 Love],,Movie,...,,[],8,,[💔 Sentimental Breakup],[],Very original and real docu movie from with re...,[],[],[]
113,Her,,2021-12-29T08:34:00.000Z,2022-01-02T21:48:00.000Z,https://www.notion.so/Her-254ff6e4c4d74af4bfe5...,,[],[💑 Love],8,Movie,...,https://www.imdb.com/title/tt1798709/,[],10,,[💔 Sentimental Breakup],[],Deep emotions expressed in believable dystopia...,[],[],[]


In [94]:
#Test Parse Notion Page

#Meaningful Life 335a46256271479f99551702c34bacae
#Fact Example e5c98a65ed91442682a3edf03889cda9
#entry = notion_db["results"][1]


entry = notion.pages.retrieve('9599cb93d2eb42878465272bf7ed6135')
pprint(entry)

#parse_page_properties(entry["properties"])
#parse_page_properties(entry["properties"], relations=True)
#pprint(parse_notion_page(entry))
#pprint(parse_notion_page(entry, relations=False))

{'archived': False,
 'cover': None,
 'created_time': '2021-12-29T08:36:00.000Z',
 'icon': None,
 'id': '9599cb93-d2eb-4287-8465-272bf7ed6135',
 'last_edited_time': '2021-12-29T08:36:00.000Z',
 'object': 'page',
 'parent': {'database_id': 'c1f6b77c-e50a-47b7-b1e5-24c923079c17',
            'type': 'database_id'},
 'properties': {'Category': {'id': 'q*x2',
                             'relation': [{'id': '127ebba8-63e3-4648-b1b8-167ac1ae58f4'},
                                          {'id': '269bbe99-b440-42fa-875f-530267ac343b'}],
                             'type': 'relation'},
                'Concepts': {'id': '7%2BAD',
                             'relation': [],
                             'type': 'relation'},
                'Name': {'id': 'title',
                         'title': [{'annotations': {'bold': False,
                                                    'code': False,
                                                    'color': 'default',
                          

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=4846e004-81b1-4f92-ba55-ed76244bfec5' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>