In [3]:
import pyasx.data.companies
import pandas as pd
import plotly.express as px
import nbformat
import plotly

In [4]:
def treemap_df(df: pd.DataFrame, path: list, value_column: str, top_n: int = 100, ignore_negative: bool = True):
    """
    Process the DataFrame by maintaining the top N rows based on the value_column
    and grouping the remaining rows by the values in the path, combining them into
    one row with the label 'other' in place of the last column referenced in the path.

    Parameters:
    - df: DataFrame
    - path: list of columns
    - value_column: string, the column based on which top N rows are maintained
    - top_n: int, the number of top rows to keep based on value_column
    - ignore_negative: bool, if True, negative values in value_column will be ignored

    Returns:
    - processed_df: DataFrame, the processed DataFrame
    """
    # Input validation
    if not all(col in df.columns for col in path + [value_column]):
        raise ValueError("Columns not present in the DataFrame.")

    # Create a copy of the specified columns
    selected_df = df.filter(items=path + [value_column]).copy()

    # Drop NaN values from the value_column
    selected_df = selected_df.dropna(subset=[value_column])

    # Handle negative values based on the ignore_negative parameter
    if ignore_negative:
        selected_df = selected_df[selected_df[value_column] > 0]

    # Sort the DataFrame by value_column
    selected_df.sort_values([value_column], ascending=[False], inplace=True)

    # Identify the top N rows based on value_column
    top_n_df = selected_df.head(top_n)

    # Identify the 'other' rows
    other_df = selected_df.iloc[top_n:]

    # Set the value 'other' in the last column specified in the path for all rows in 'other_df'
    other_df.loc[:, path[-1]] = 'other'

    # Combine 'other' rows into one row with 'other' label
    other_df = other_df.groupby(path, as_index=False).agg({value_column: 'sum'})

    # Concatenate the top N rows and the 'other' rows
    processed_df = pd.concat([top_n_df, other_df], ignore_index=True)

    return processed_df


In [None]:
def generate_treemap(
        df: pd.DataFrame,
        path: list,
        value_column: str,
        top_n: int = 100,
        ignore_negative: bool = True):

    # Get data to map
    data = treemap_df(
        df = df,
        path = path,
        value_column = value_column,
        top_n = top_n,
        ignore_negative = ignore_negative)

    fig = px.treemap(data, path=path, values=value_column)

    return fig

In [5]:
# Get latest asx companies
all_asx_companies = pyasx.data.companies.get_listed_companies()

# Convert all_asx_companies to dataframe
all_asx_companies_df = pd.DataFrame(all_asx_companies)
gics = all_asx_companies_df['gics_industry'].copy()
all_asx_companies_df['gics_industry'] = all_asx_companies_df['listing_date']
all_asx_companies_df['listing_date'] = gics
all_asx_companies_df = all_asx_companies_df.drop('market_cap', axis=1)

In [6]:
# Add in market cap values
for ticker in all_asx_companies_df['ticker']:
    try:
        market_cap = pyasx.data.companies.get_company_info(ticker)['primary_share']['market_cap']
    except:
        market_cap = None

    all_asx_companies_df.loc[all_asx_companies_df['ticker'] == ticker, 'market_cap'] = market_cap


In [11]:
tree_fig = generate_treemap(
    df = all_asx_companies_df,
    path = ['gics_industry', 'ticker'],
    value_column = 'market_cap',
    top_n = 100,
    ignore_negative = True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
# Show the plot
tree_fig.show()