In [1]:
import sys
sys.path.append('../')

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from utils.plot_utils import export_plot


In [2]:
competitors = pd.read_parquet('../data/raw/competitors.parquet.gzip')
parents = pd.read_parquet('../data/raw/parents.parquet.gzip')
partners = pd.read_parquet('../data/raw/partners.parquet.gzip')
suppliers = pd.read_parquet('../data/raw/suppliers.parquet.gzip')
info = pd.read_parquet('../data/raw/entity_info.parquet.gzip')
address = pd.read_parquet('../data/raw/addresses.parquet.gzip')

## Sector plot

In [3]:
df = info.groupby(['sector']).count().reset_index().sort_values('code',ascending=False)
df.loc[df['code']<7000, 'sector']= 'Other'
fig = px.pie(df, values='code', names='sector', title='Sector distribution', height=500, width=700)
export_plot(fig, 'sector_distribution', ['html','png','svg'])

'Exported'

## Bar plot of Start years in suppliers relationship

In [24]:
suppliers.start_date = pd.to_datetime(suppliers.start_date)
supplier_relation_start_counts = suppliers.start_date.dt.year.value_counts().reset_index()
supplier_relation_start_counts.sort_values('index', inplace=True)

In [25]:
fig = px.bar(supplier_relation_start_counts, x='index', y='start_date',  height=500, width=700)

fig.update_layout(
    title='Number of supplier relations per year',
    xaxis_title='Year',
    yaxis_title='Number of supplier relations',
)

export_plot(fig, 'supplier_relation_start_counts', ['html', 'png', 'svg'])

'Exported'

## Bar plot of Start years in Partners relationship

In [26]:
partners.start_date = pd.to_datetime(partners.start_date)
partner_relation_start_counts = partners.start_date.dt.year.value_counts().reset_index()
partner_relation_start_counts.sort_values('index', inplace=True)

In [27]:
fig = px.bar(partner_relation_start_counts, x='index', y='start_date',  height=500, width=700)

fig.update_layout(
    title='Number of partner relations per year',
    xaxis_title='Year',
    yaxis_title='Number of partner relations',
)

export_plot(fig, 'partner_relation_start_counts', ['html', 'png', 'svg'])

'Exported'

## Bar plot of Start years in competitors relationship

In [28]:
competitors.start_date = pd.to_datetime(competitors.start_date)
competitor_relation_start_counts = competitors.start_date.dt.year.value_counts().reset_index()
competitor_relation_start_counts.sort_values('index', inplace=True)

In [29]:
fig = px.bar(competitor_relation_start_counts, x='index', y='start_date',  height=500, width=700)

fig.update_layout(
    title='Number of competitors relations per year',
    xaxis_title='Year',
    yaxis_title='Number of competitor relations',
)

export_plot(fig, 'competitor_relation_start_counts', ['html', 'png', 'svg'])

'Exported'

## Plot of number of companies in countries

In [6]:
availability = address.groupby(['country']).count().sort_values(by='name', ascending=False).reset_index()

In [7]:
availability

Unnamed: 0,country,id,name,city_state_postal,location_street1
0,USA,48514,48514,43972,42607
1,CHN,19894,19894,14127,12776
2,JPN,11781,11781,8659,8318
3,GBR,11312,11312,10177,9979
4,DEU,7357,7357,6580,6496
...,...,...,...,...,...
204,GUF,1,1,1,1
205,ERI,1,1,0,0
206,ASM,1,1,0,0
207,VAT,1,1,1,0


In [14]:
fig = px.choropleth(availability, locations="country",
                    color="name", # lifeExp is a column of gapminder
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                    height=500, width=1000,)

fig.update_layout(
    coloraxis_colorbar=dict(
    title="Number of Bills per Cell",
    len=1
    ),
    title_text = 'Availability of company data based on country'
)

export_plot(fig, 'availability', ['html', 'png', 'svg'])

'Exported'