In [6]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

In [11]:
revenues = pd.read_csv('revenue_data.csv')

In [12]:
df_tech = revenues[revenues['Industry']=='Tech']
df_prof_serve = revenues[revenues['Industry']=='Professional Services']
df_retail = revenues[revenues['Industry']=='Pharmaceuticals']
df_oil = revenues[revenues['Industry']=='Oil']

### Revenue box subplots
You are now a regular contractor with the New York Stock Exchange, who have asked you to revisit the box plots by industry you created previously.

They are creating some visualizations for a specific presentation and have found that the plot you created before is too wide. They are also only interested in 4 specific industries.

The make_subplots() function has been imported for you already.



In [13]:
# Set up the subplots grid
fig = make_subplots(rows=2, cols=2, 
                    # Set the subplot titles
                    subplot_titles=['Tech', 'Professional Services', 'Pharmaceuticals', 'Oil'])

# Add the Tech trace
fig.add_trace(go.Box(x=df_tech.Revenue, name='', showlegend=False), row=1, col=1)
# Add the Professional Services trace
fig.add_trace(go.Box(x=df_prof_serve.Revenue, name='', showlegend=False), row=1, col=2)
# Add the Retail trace
fig.add_trace(go.Box(x=df_retail.Revenue, name='', showlegend=False), row=2, col=1)
# Add the Oil trace
fig.add_trace(go.Box(x=df_oil.Revenue, name='', showlegend=False), row=2, col=2)

# Add a title (and show)
fig.update_layout({'title': {'text': 'Box plots of company revenues', 
                   'x': 0.5, 
                   'y': 0.9}})
fig.show()

### Revenue histogram subplots
The revenue histogram with colors by industry (with stacked bars) you created for the The New York Stock exchange firm was enlightening for which industries tended to be in which area of the histogram.

However, the firm wishes to understand the distribution of each industry without having to hover to see. For this analysis, the previous histogram has too much in a single plot, but they don't want multiple plots. How can you help solve this conundrum?

Your task is to create a histogram of company revenues by industry as a stacked subplot and a shared x-axis to allow meaningful comparison of industries.

You have a revenues DataFrame loaded for you.



In [14]:
# Create the subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)

# Loop through the industries
row_num = 1
for industry in ['Tech', 'Retail', 'Professional Services']:
    df = revenues[revenues.Industry == industry]
    # Add a histogram using subsetted df
    fig.add_trace(go.Histogram(x=df['Revenue'], name=industry),
    # Position the trace
    row=row_num, col=1)
    row_num +=1

# Show the plot
fig.show()

### Species on different islands
The Antarctic research scientists are back with another brief. They want to be able to visualize how their data collection counts differ between species and islands.

Specifically, they want to easily compare islands based on the count of different species of penguins they recorded there.

You have the perfect plot - you will layer several bar charts together for easy comparison!

You have been provided a penguins_grouped DataFrame that has the count of samples for each species at each island as well as an islands list of the different islands where research was undertaken.

In [15]:
penguins = pd.read_csv('penguins.csv')
penguins.head()

Unnamed: 0.1,Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
0,1,PAL0708,1,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,2007-11-11,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,2,PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,2007-11-11,39.5,17.4,186.0,3800.0,FEMALE,8.94956,-24.69454,
2,3,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,2007-11-16,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
3,4,PAL0708,4,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A2,Yes,2007-11-16,,,,,,,,Adult not sampled.
4,5,PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,2007-11-16,36.7,19.3,193.0,3450.0,FEMALE,8.76651,-25.32426,


In [34]:
penguins_grouped = penguins.groupby(['Island','Species']).count()

In [52]:
penguins_grouped.columns

Index(['Island', 'Species', 'count', 'studyName', 'Sample Number', 'Region',
       'Stage', 'Individual ID', 'Clutch Completion', 'Date Egg',
       'Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)',
       'Body Mass (g)', 'Sex', 'Delta 15 N (o/oo)', 'Delta 13 C (o/oo)',
       'Comments'],
      dtype='object')

In [53]:
# Create the base figure
fig = go.Figure()

# Loop through the species
for species in ['Adelie Penguin (Pygoscelis adeliae)', 'Gentoo penguin (Pygoscelis papua)', 'Chinstrap penguin (Pygoscelis antarctica)']:
  # Add a bar chart trace
  fig.add_trace(go.Bar(x=penguins_grouped['Island'],
    # Set the appropriate y subset and name
    y=penguins_grouped[penguins_grouped.Species == species]['count'],
    name=species))
# Show the figure
fig.show()

### Monthly temperatures layered
The Australian Bureau Of Meteorology has tasked you with helping them build some nice interactive plots for their website.

They want to look at both the daily temperature from January to July this year and smooth out all the data points with a nice trend line of the monthly average temperature.

This would be an excellent opportunity to layer two plots together to achieve the desired outcome.

You have been provided a temp_syd DataFrame that contains the daily (max) temperature from January to July in 2020. You also have a temp_syd_avg DataFrame containing each month's average daily (max) temperature.



In [55]:
temp_syd = pd.read_csv('sydney_temps.csv')

In [61]:
temp_syd_avg = temp_syd.groupby(['Date']).mean().reset_index()

In [67]:
temp_syd_avg.rename(columns={'Temp':'Average'}, inplace=True)

In [86]:
temp_syd_avg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype    
---  ------   --------------  -----    
 0   Date     12 non-null     period[M]
 1   Average  12 non-null     float64  
dtypes: float64(1), period[M](1)
memory usage: 320.0 bytes


In [98]:
temp_syd_avg = temp_syd.groupby(pd.PeriodIndex(temp_syd['Date'], freq="M"))['Temp'].mean().reset_index()
temp_syd_avg.rename(columns={'Temp':'Average'}, inplace=True)

In [99]:
temp_syd_avg['Date'] = temp_syd_avg['Date'].astype(str) + '-1'

In [100]:
temp_syd_avg['Date']

0     2020-01-1
1     2020-02-1
2     2020-03-1
3     2020-04-1
4     2020-05-1
5     2020-06-1
6     2020-07-1
7     2020-08-1
8     2020-09-1
9     2020-10-1
10    2020-11-1
11    2020-12-1
Name: Date, dtype: object

In [101]:
# Create the base figure
fig = go.Figure()

# Add the bar graph of daily temperatures
fig.add_trace(
  go.Bar(x=temp_syd['Date'], y=temp_syd['Temp'], name='Daily Max Temperature'))

# Add the monthly average line graph
fig.add_trace(go.Scatter(x=temp_syd_avg['Date'], y=temp_syd_avg['Average'], name='Average Monthly Temperature'))

# Show the plot
fig.show()

### Time buttons on our rainfall graph
The local news station is wanting to update the graphics in the weather section of their website. They have contacted you to assist in jazzing up the old images and tables they have.

They have requested a line chart, but with the ability to filter the data for the last 4 weeks (4WTD), last 48 hours (48HR) and the year to date (YTD).

In this exercise, you will help the news station by building their line chart with the requested buttons.

You have a rain DataFrame available that contains the necessary data.



In [102]:
rain = pd.read_csv('rain.csv')

In [114]:
# Create the basic line chart
fig = px.line(data_frame=rain, x='Date', y='Rainfall', 
              title="Rainfall (mm)")

# Create the buttons
date_buttons = [
{'count': 28, 'label': '4WTD', 'step': "day", 'stepmode': "todate"},
{'count': 48, 'label': '48HR', 'step': "hour", 'stepmode': "todate"},
{'count': 1, 'label': 'YTD', 'step': "year", 'stepmode': "todate"}]


# Add the buttons and show
fig.update_layout({'xaxis': {'rangeselector': {'buttons': date_buttons}}})
fig.show()

### Finance line chart with custom time buttons
You have been engaged by an Excel-savvy finance trading company to help them jazz up their data visualization capabilities. Safe to say, Excel graphics aren't cutting it for them!

The fund is particularly interested in the electric vehicle company Tesla and how it has performed this year and wants a tool that can help them zoom to view key timeframes.

In this exercise, you will help the trading company by visualizing the opening stock price of Tesla over 2020 and create the following date-filter buttons:

1WTD = The previous week (7 days to date)
6MTD = The previous 6 months week (6 months to date)
YTD = The current year to date
You have a tesla DataFrame available that contains the necessary data.



In [107]:
tesla = pd.read_csv('AAPL.csv')

In [108]:
tesla.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-08-30,210.160004,210.449997,207.199997,208.740005,206.689255,21143400
1,2019-09-03,206.429993,206.979996,204.220001,205.699997,203.679108,20023000
2,2019-09-04,208.389999,209.479996,207.320007,209.190002,207.134857,19188100
3,2019-09-05,212.0,213.970001,211.509995,213.279999,211.184647,23913700
4,2019-09-06,214.050003,214.419998,212.509995,213.259995,211.164841,19362300


In [109]:
# Create the basic line chart
fig = px.line(data_frame=tesla, x='Date', y='Open', title="Tesla Opening Stock Prices")

# Create the financial buttons
fin_buttons = [
  {'count': 7, 'label': "1WTD", 'step': 'day', 'stepmode': 'todate'},
  {'count': 6, 'label': "6MTD", 'step': 'month', 'stepmode': 'todate'},
  {'count': 1, 'label': "YTD", 'step': 'year', 'stepmode': 'todate'}
]

# Create the date range buttons & show the plot
fig.update_layout({'xaxis': {'rangeselector': {'buttons': fin_buttons}}})
fig.show()