In [1]:
import pandas as pd
import sys
sys.path.append('/')
from utils_processing import transform_counts_to_calendar
from utils_plot import plot_bar_time_series, create_map, create_tree

In [2]:
df_0 = pd.read_excel("../data/book_record.xlsx")

plot_bar_time_series(
    df_0, 
    select_type='book', 
    time_period='dates_read', 
    title='Books Read',
    save_path=None
    )

In [3]:
# --- STEP 1: Weighted genre counts (unchanged) ---
df_genres = df_0['genre'].dropna().apply(lambda x: [g.strip() for g in x.split(',')])

genre_count = {}
# --- STEP 1: Collect weighted counts by genre and by type ---
genre_type_weights = {}   # fiction/nonfiction weight distribution

for _, row in df_0.dropna(subset=['genre']).iterrows():
    genres = [g.strip() for g in row['genre'].split(',')]
    weight = 1 / len(genres)
    book_type = row.get('type', None)

    for g in genres:
        if g not in genre_type_weights:
            genre_type_weights[g] = {'Fiction': 0.0, 'Nonfiction': 0.0}

        if book_type in ('Fiction', 'Nonfiction'):
            genre_type_weights[g][book_type] += weight


# --- STEP 2: Compute global totals (same as before) ---
total_fiction = sum(v['Fiction'] for v in genre_type_weights.values())
total_nonfiction = sum(v['Nonfiction'] for v in genre_type_weights.values())
total = total_fiction + total_nonfiction

# fiction_flag = f'Fiction ({total_fiction:.0f}, {total_fiction/total:.0%})'
# nonfiction_flag = f'Nonfiction ({total_nonfiction:.0f}, {total_nonfiction/total:.0%})'

fiction_flag = f'Fiction'
nonfiction_flag = f'Nonfiction'

# --- STEP 3: Create rows with type-specific counts (NEW LOGIC) ---
rows = []
for g, w in genre_type_weights.items():

    # Fiction row
    if w['Fiction'] > 0:
        rows.append({
            'genre': g,
            'count': w['Fiction'],       # <-- type-specific count
            'flag': fiction_flag
        })

    # Nonfiction row
    if w['Nonfiction'] > 0:
        rows.append({
            'genre': g,
            'count': w['Nonfiction'],    # <-- type-specific count
            'flag': nonfiction_flag
        })

df_genre_counts = pd.DataFrame(rows)

# --- STEP 4: Top titles (same logic, repeated for each genre row) ---
df_genre_counts['top'] = df_genre_counts['genre'].apply(
    lambda g: df_0[
        df_0['rating'].eq(10) &
        df_0['genre'].notna() &
        df_0['genre'].apply(lambda x: g in [s.strip() for s in x.split(',')])
    ]['title'].tolist()
)

df_genre_counts.sort_values(by='count', ascending=False)

Unnamed: 0,genre,count,flag,top
9,Science Fiction,50.0,Fiction,"[The Dark Forest, Death's End, Stories of Your..."
1,Philosophy,36.833333,Nonfiction,"[A History of Western Philosophy, The Social C..."
8,Science,31.166667,Nonfiction,"[The Universe in a Nutshell, A Brief History o..."
3,History,18.833333,Nonfiction,"[A History of Western Philosophy, The Art of G..."
4,Sociology,15.5,Nonfiction,[The Social Contract]
6,Classic,12.0,Fiction,"[The Left Hand of Darkness, The Dispossessed, ..."
16,Speculative Fiction,11.0,Fiction,"[Stories of Your Life and Others, Childhood's ..."
5,Architecture,6.833333,Nonfiction,"[The Art of Gothic: Architecture, Sculpture, P..."
11,Thriller,4.0,Fiction,[]
10,Young Adult,3.5,Fiction,[]


In [6]:
create_tree(
    df=df_genre_counts,
    feat='genre',
    var='count',
    flag='flag',
    threshold=5,
    threshold_global=False,
    group_flag=False,
    color_dict={fiction_flag: 'royalblue', nonfiction_flag: 'gold'},
    font_size_dict={'base': 12, 'title': 16, 'tree_text': 12},
    save_path='../attachments/tree_book.html',
    #save_path=None
    )

In [5]:
df = transform_counts_to_calendar(df_0)
VAR = 'total_books'

create_map(
    df, 
    var=VAR, 
    code_convention='code3', 
    bins=[0, 1, 10, 30, df[VAR].max()],
    color='royalblue',
    projection_type="orthographic",
    tooltip_mode='raw',
    #save_path='../attachments/map_books.html'
    save_path=None
    )