## Data explorations with Plotly

#### Udemy course:
- [Data Visualization in Python (Mplib, Seaborn, Plotly, Dash)](https://www.udemy.com/course/data-visualization-in-python/)

In [1]:
import plotly.express as px
import seaborn as sns
#import pandas as pd

In [2]:
#import plotly
#plotly.__version__
#'5.9.0'

## FMRI dataset

In [3]:
fmri = sns.load_dataset("fmri")
fmri.head(6)

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
1,s5,14,stim,parietal,-0.080883
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797
5,s9,18,stim,parietal,-0.103513


In [4]:
fmri.region.unique()

array(['parietal', 'frontal'], dtype=object)

In [5]:
fmri.sort_values(by=["subject", "event", "region", "timepoint"], inplace=True)

In [6]:
px.line(fmri, x="timepoint", y="signal", facet_row="region", facet_col="event", color="subject",
        hover_name="subject")

## Stock evolution dataset

In [7]:
stocks = px.data.stocks(indexed=True)
stocks.head()

company,GOOG,AAPL,AMZN,FB,NFLX,MSFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-01,1.0,1.0,1.0,1.0,1.0,1.0
2018-01-08,1.018172,1.011943,1.061881,0.959968,1.053526,1.015988
2018-01-15,1.032008,1.019771,1.05324,0.970243,1.04986,1.020524
2018-01-22,1.066783,0.980057,1.140676,1.016858,1.307681,1.066561
2018-01-29,1.008773,0.917143,1.163374,1.018357,1.273537,1.040708


In [8]:
px.line(stocks, line_shape="spline")

In [9]:
px.area(stocks)

## Diamonds dataset

In [10]:
diamonds = sns.load_dataset("diamonds")
diamonds.head(6)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
5,0.24,Very Good,J,VVS2,62.8,57.0,336,3.94,3.96,2.48


#### How many diamons do we have by cut / clarity / color?

In [11]:
pie_graph = px.pie(diamonds, names="clarity")
pie_graph.update_layout(title={"text": "Diamond clarity percentage", "x": .5})

In [12]:
histogram = px.histogram(diamonds, x="price", nbins=30)
histogram.update_layout(title={"text": "Diamond price count", "x": .5})

#### Does the cut / clarity / color affect the price?

In [13]:
px.box(diamonds, x="cut", y="price")

#### What variable affect the price?

In [14]:
px.scatter_matrix(diamonds.sample(2000), dimensions=["carat", "depth", "table", "price"], height=1500)

In [15]:
correlations = diamonds.corr(numeric_only=True)
correlations

Unnamed: 0,carat,depth,table,price,x,y,z
carat,1.0,0.028224,0.181618,0.921591,0.975094,0.951722,0.953387
depth,0.028224,1.0,-0.295779,-0.010647,-0.025289,-0.029341,0.094924
table,0.181618,-0.295779,1.0,0.127134,0.195344,0.18376,0.150929
price,0.921591,-0.010647,0.127134,1.0,0.884435,0.865421,0.861249
x,0.975094,-0.025289,0.195344,0.884435,1.0,0.974701,0.970772
y,0.951722,-0.029341,0.18376,0.865421,0.974701,1.0,0.952006
z,0.953387,0.094924,0.150929,0.861249,0.970772,0.952006,1.0


In [16]:
px.imshow(correlations)

#### Explore the relationship between carat, clarity, cut, color and price

In [17]:
px.scatter(diamonds, x="carat", y="price", color="clarity",
           log_x=True, log_y=True, opacity=.6)