In [1]:
import pandas as pd
import sys
sys.path.append('/')
from utils_processing import transform_verbose_to_calendar
from utils_plot import plot_countries_been_over_years, create_map, create_tree

In [2]:
df_verbose = pd.read_csv('../data/trips_verbose.csv')
df = transform_verbose_to_calendar(df_verbose, save=False)
df = df[df['country'] != 'All']
plot_countries_been_over_years(df)

In [3]:
import pandas as pd
import country_converter as coco

df['continent'] = coco.convert(names=df['country'], to='continent')
print(df.continent.unique())
df.head()

['Asia' 'Europe' 'America' 'Africa']


Unnamed: 0,country,total_days,1996,1997,1998,1999,2000,2001,2002,2003,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,continent
1,China,6978,89,365,365,365,366,365,365,365,...,28,0,16,0,0,0,0,0,27,Asia
2,Sweden,1634,0,0,0,0,0,0,0,0,...,0,0,0,108,299,260,326,312,329,Europe
3,United States,911,0,0,0,0,0,0,0,0,...,243,257,0,0,0,0,0,0,0,America
4,United Kingdom,662,0,0,0,0,0,0,0,0,...,0,102,316,244,0,0,0,0,0,Europe
5,Canada,51,0,0,0,0,0,0,0,0,...,25,6,0,0,0,0,0,0,0,America


In [None]:
import plotly.io as pio

def create_tree(
        df,
        feat='genre',
        var='count',
        flag='flag',
        color_dict={'Movie': 'royalblue', 'TV': 'gold', 'Other': 'white'},
        threshold=0,
        threshold_global=True,
        save_path=None
        ):

    df_copy = df.copy()
    
    # Step 0: build top-level blocks
    top_rows = []
    other_rows = []
    
    if threshold_global:
        # Global aggregation
        large = df_copy[df_copy[var] >= threshold]
        small = df_copy[df_copy[var] < threshold]
        top_rows.append(large.assign(parent=""))
        if not small.empty:
            other_rows.append(small.assign(parent="Other"))  # children of Other
            top_rows.append(pd.DataFrame([{"id":"Other","label":"Other","parent":"","value":small[var].sum(),"flag":"Other"}]))
    else:
        # Per flag aggregation
        for f, group in df_copy.groupby(flag):
            large = group[group[var] >= threshold]
            small = group[group[var] < threshold]
            top_rows.append(large.assign(parent=""))
            if not small.empty:
                other_rows.append(small.assign(parent=f))
                top_rows.append(pd.DataFrame([{"id":f+" | Other","label":"Other","parent":"","value":small[var].sum(),"flag":f}]))

    # Combine top-level and other children
    df_top = pd.concat(top_rows, ignore_index=True)
    df_children = pd.concat(other_rows, ignore_index=True) if other_rows else pd.DataFrame(columns=df_top.columns)

    # Build id and label for top-level
    if 'id' not in df_top.columns:
        df_top['id'] = df_top[feat].astype(str)
    if 'label' not in df_top.columns:
        df_top['label'] = df_top[feat].astype(str)
    if 'value' not in df_top.columns:
        df_top['value'] = df_top[var]
    if 'parent' not in df_top.columns:
        df_top['parent'] = df_top.get('parent', "")

    # Children ids and labels
    if not df_children.empty:
        df_children['id'] = df_children[feat].astype(str) + "_" + df_children[var].astype(str)
        df_children['label'] = df_children[feat].astype(str)
        df_children['value'] = df_children[var]
        df_children['parent'] = df_children['parent'].apply(lambda x: "Other" if threshold_global else x + " | Other")

    # Final df for treemap
    df_final = pd.concat([df_top, df_children], ignore_index=True)
    
    # Step 1: wrap feat for top-level only
    min_line_length = 10
    max_line_length = 30
    count_min = df_final[var].min()
    count_max = df_final[var].max()
    norm_var = f'norm_{var}'
    df_final[norm_var] = (df_final[var] - count_min) / (count_max - count_min)
    df_final['line_length'] = df_final[norm_var] * (max_line_length - min_line_length) + min_line_length
    df_final['line_length'] = df_final['line_length'].astype(int)
    df_final['line_length'] = (df_final[norm_var] * (max_line_length - min_line_length) + min_line_length).fillna(min_line_length).astype(int)

    def wrap_feat(row):
        genre = str(row['label'])
        line_length = row['line_length']
        if len(genre) <= line_length:
            return genre
        words = genre.split(' ')
        lines = []
        current_line = ''
        for word in words:
            if len(current_line) + len(word) + (1 if current_line else 0) > line_length:
                if current_line:
                    lines.append(current_line)
                current_line = word
            else:
                current_line = f"{current_line} {word}" if current_line else word
        if current_line:
            lines.append(current_line)
        return '<br>'.join(lines)
    
    df_final['label'] = df_final.apply(wrap_feat, axis=1)

    # Step 2: create treemap
    fig = go.Figure(go.Treemap(
        ids=df_final['id'],
        labels=df_final['label'],
        parents=df_final['parent'],
        values=df_final['value'],
        marker=dict(
            colors=[color_dict.get(f, 'gray') for f in df_final.get('flag', df_final['id'])],
            line=dict(color='black', width=1)
        ),
        textinfo="label+value+percent parent",
        branchvalues='total'
    ))

    # Step 3: title reflects original proportions
    flag_counts = df.groupby(flag, as_index=False)[var].sum().sort_values(var, ascending=False)
    total_count = flag_counts[var].sum()
    flag_summaries = []
    for _, row in flag_counts.iterrows():
        flag_name = row[flag]
        flag_count = row[var]
        flag_frac = flag_count / total_count
        color = color_dict.get(flag_name, 'gray')
        flag_summaries.append(f"<span style='color:{color}'>{flag_name} ({int(flag_count)}, {flag_frac:.0%})</span>")
    title_text = ", ".join(flag_summaries)

    fig.update_layout(
        margin=dict(l=1, r=1, t=20, b=1),
        template='plotly_dark',
        font=dict(color='white', size=11),
        height=560,
        uniformtext=dict(minsize=8, mode='show'),
        title=dict(
            text=title_text,
            x=0.5,
            xanchor='center',
            yanchor='top',
            y=0.99,
            font=dict(size=16)
        )
    )

    config = {'displayModeBar': False, 'responsive': True}
    if save_path:    
        pio.write_html(fig, file=save_path, config=config, include_plotlyjs='cdn')

    return fig.show(config=config)

create_tree(
    df=df,
    feat='country',
    var='total_days',
    flag='continent',
    color_dict={'Asia':'darkorange', 'Europe':'royalblue', 'America':'seagreen', 'Africa':'crimson'},
    threshold=29,
    threshold_global=True,
    #save_path='../attachments/tree_travel.html',
    save_path=None
    )

ValueError: cannot convert NA to integer

In [4]:
VAR = 'total_days'

create_map(
    df, 
    var=VAR, 
    code_convention='code3', 
    bins=[0, 7, 30, 365, df[VAR].max()],
    color='royalblue',
    projection_type="orthographic", # orthographic, equirectangular, mercator, natural earth, kavrayskiy7, miller, robinson, eckert4, azimuthal equal area, azimuthal equidistant, gnomonic, stereographic, mollweide
    tooltip_mode='calendar',
    #save_path='../attachments/map_travel.html'
    save_path=None
    )