##  Installing Plotly

<p>Use the Conda install plotly command. Visit the following link<Br>
    <a href="https://anaconda.org/plotly/plotly">https://anaconda.org/plotly/plotly</a></p>

## 1. Introduction to Plotly

<p>
    1. Until now we did visualisations using Matplotlib, Seaborn and Pandas. All of them produce
    static image files.<br><br>
    2. Plotly is company based out in Canada famous for it's products like Plotly and Dash<br><br>
    3. Plotly creates interactive visualisations in the form of HTML files<br><br>
    4. Drawback- can't work with a live data source<br><br>
    5. Dash is used to create live data based dashboards.
</p>

In [3]:
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

In [6]:
match=pd.read_csv('matches.csv')
delivery=pd.read_csv('deliveries.csv')

ipl=delivery.merge(match,left_on='match_id',right_on='id')
ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,


## 1. Scatter Plots

<img src="https://www.mathsisfun.com/data/images/scatter-ice-cream1.svg"/>

In [9]:
# Scatter plots are drawn between to continous variables
# Problem :- We are going to draw a scatter plot between Batsman Avg(X axis) and
# Batsman Strike Rate(Y axis) of the top 50 batsman in IPL(All time)


In [7]:
# Avg vs SR graph of Top 50 batsman(in terms of total runs)

# Fetching a new dataframe with Top 50 batsman
top50=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.tolist()
new_ipl=ipl[ipl['batsman'].isin(top50)]


In [8]:
# Calculating SR
# SR=[(number of runs scored)/(number of balls played)]*100
runs=new_ipl.groupby('batsman')['batsman_runs'].sum()
balls=new_ipl.groupby('batsman')['batsman_runs'].count()

sr=(runs/balls)*100

sr=sr.reset_index()
sr

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,148.740343
1,AC Gilchrist,133.054662
2,AD Russell,179.950187
3,AJ Finch,127.113703
4,AM Rahane,119.751166
5,AT Rayudu,124.058187
6,BB McCullum,127.332746
7,CH Gayle,145.64037
8,DA Miller,136.264535
9,DA Warner,139.523249


In [9]:
# Calculating Avg
# Avg=(Total number of Runs)/(Number of outs)

# Calculating number of outs for top 50 batsman
out=ipl[ipl['player_dismissed'].isin(top50)]

nouts=out['player_dismissed'].value_counts()

avg=runs/nouts

avg=avg.reset_index()
avg.rename(columns={'index':'batsman',0:'avg'},inplace=True)

avg=avg.merge(sr,on='batsman')
avg

Unnamed: 0,batsman,avg,batsman_runs
0,AB de Villiers,40.254545,148.740343
1,AC Gilchrist,27.223684,133.054662
2,AD Russell,34.404762,179.950187
3,AJ Finch,26.029851,127.113703
4,AM Rahane,33.189655,119.751166
5,AT Rayudu,28.921739,124.058187
6,BB McCullum,27.817308,127.332746
7,CH Gayle,41.834862,145.64037
8,DA Miller,34.090909,136.264535
9,DA Warner,43.1,139.523249


In [21]:
# Plot Scatter Plot here
trace=go.Scatter(x=avg['avg'],y=avg['batsman_runs'],mode='markers',text=avg['batsman'],marker={'color':'#00a65a', 'size':12})
data=[trace]
layout=go.Layout(title='Batsman Avg vs Strike rate',xaxis={'title':'Batsman Avg'},yaxis={'title':'Batsman Strike rate'})
fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='Myfile.html')

'Myfile.html'

## 2. Line Chart

<p>It's an extension of Scatter plot. Usually used to show a time series data</p>
<img src='https://apexcharts.com/wp-content/uploads/2018/01/basic-line-chart.svg'/>

In [10]:
# Year by Year batsman performance

single=ipl[ipl['batsman']=='V Kohli']
performance=single.groupby('season')['batsman_runs'].sum().reset_index()
performance

single1=ipl[ipl['batsman']=='MS Dhoni']
performance1=single1.groupby('season')['batsman_runs'].sum().reset_index()
performance1


Unnamed: 0,season,batsman_runs
0,2008,414
1,2009,332
2,2010,287
3,2011,392
4,2012,357
5,2013,461
6,2014,371
7,2015,372
8,2016,284
9,2017,290


In [36]:
# Plot Line Chart here
trace=go.Scatter(x=performance['season'],
                 y=performance['batsman_runs'],
                 mode='lines+markers',
                 marker={'color':'#00a65a','size':12},name='V Kholi')

trace1=go.Scatter(x=performance1['season'],
                 y=performance1['batsman_runs'],
                 mode='lines+markers',
                 marker={'size':12},name='MS Dhoni')

data=[trace,trace1]
layout=go.Layout(title='Year by year performance',
                 xaxis={'title':'season'},
                 yaxis={'title':'Total runs'})

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)


'temp-plot.html'

In [37]:
# Multiple Line Charts

def batsman_comp(*name):
    data=[]
    for i in name:
        single=ipl[ipl['batsman']==i]
        performance=single.groupby('season')['batsman_runs'].sum().reset_index()

        trace=go.Scatter(x=performance['season'],y=performance['batsman_runs']
                         ,mode='lines + markers',name=i)
        
        data.append(trace)
    
    layout=go.Layout(title='Batsman Record Comparator',
                xaxis={'title':'Season'},
                yaxis={'title':'Runs'})

    fig=go.Figure(data=data,layout=layout)

    pyo.plot(fig,filename='year_by_year')
        
        

In [38]:
batsman_comp('V Kohli', 'RG Sharma','DA Warner','MS Dhoni')


Your filename `year_by_year` didn't end with .html. Adding .html to the end of your file.



## 3. Bar Plot

<p>Used to show relation between one categorical and 1 numerical data</p>
<img src="https://images.ctfassets.net/fevtq3bap7tj/5FSJrJeDIIGAmGCsGcQ8S4/e2fc867a487614b47f72104a36fbcf7f/simple-column.png"/>

In [12]:
top10=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.tolist()
top10_df=ipl[ipl['batsman'].isin(top10)]

In [13]:
top10_score=top10_df.groupby('batsman')['batsman_runs'].sum().reset_index()
top10_score

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,4428
1,CH Gayle,4560
2,DA Warner,4741
3,G Gambhir,4223
4,MS Dhoni,4477
5,RG Sharma,4914
6,RV Uthappa,4446
7,S Dhawan,4632
8,SK Raina,5415
9,V Kohli,5434


In [43]:
# Plot Bar Graph
trace=go.Bar(x=top10_score['batsman'],y=top10_score['batsman_runs'])

data=[trace]

layout=go.Layout(title='Top 10 IPL Batsman',
                xaxis={'title':'Batsmans'},
                yaxis={'title':'Batsman Runs'})

fig=go.Figure(data=data,layout=layout)
pyo.plot(fig)


'temp-plot.html'

### There are 2 types of Bar Graphs
<p>
    1. Nested Bar Graph<Br>
    2. Stacked Bar Graph<br>
    3. Overlayed Bar Graph
</p>

In [14]:
iw=top10_df.groupby(['batsman','inning'])['batsman_runs'].sum().reset_index()
mask=iw['inning']==1
mask2=iw['inning']==2
one=iw[mask]
two=iw[mask2]


one.rename(columns={'batsman_runs':'1st Innings'},inplace=True)
two.rename(columns={'batsman_runs':'2nd Innings'},inplace=True)

final=one.merge(two,on='batsman')[['batsman','1st Innings','2nd Innings']]

final



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,batsman,1st Innings,2nd Innings
0,AB de Villiers,2616,1799
1,CH Gayle,2632,1903
2,DA Warner,2571,2170
3,G Gambhir,1759,2464
4,MS Dhoni,2632,1845
5,RG Sharma,2837,2077
6,RV Uthappa,1822,2622
7,S Dhawan,2690,1942
8,SK Raina,3108,2299
9,V Kohli,2876,2553


In [52]:
# Plot Bar Chart here
trace1=go.Bar(x=final['batsman'],y=final['1st Innings'],name='1st Innings',marker={'color':'#00a65a'})

trace2=go.Bar(x=final['batsman'],y=final['2nd Innings'],name='2nd Innings',marker={'color':'#a6a65a'})

data=[trace1,trace2]

layout=go.Layout(title='Inning Wise Scores',
                xaxis={'title':'Batsman'},
                yaxis={'title':'Runs'},
                barmode='overlay')

fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='Overlay bar graph.html')

'Overlay bar graph.html'

In [53]:
# Plot Bar Chart here
trace1=go.Bar(x=final['batsman'],y=final['1st Innings'],name='1st Innings',marker={'color':'#00a65a'})

trace2=go.Bar(x=final['batsman'],y=final['2nd Innings'],name='2nd Innings',marker={'color':'#a6a65a'})

data=[trace1,trace2]

layout=go.Layout(title='Inning Wise Scores',
                xaxis={'title':'Batsman'},
                yaxis={'title':'Runs'},
                barmode='stack')

fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='Stack bar graph.html')

'Stack bar graph.html'

In [54]:
# Plot Bar Chart here
trace1=go.Bar(x=final['batsman'],y=final['1st Innings'],name='1st Innings',marker={'color':'#00a65a'})

trace2=go.Bar(x=final['batsman'],y=final['2nd Innings'],name='2nd Innings',marker={'color':'#a6a65a'})

data=[trace1,trace2]

layout=go.Layout(title='Inning Wise Scores',
                xaxis={'title':'Batsman'},
                yaxis={'title':'Runs'})

fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='Nested bar graph.html')

'Nested bar graph.html'

## 4. Bubble Plot
<p>Again an extension of Scatter plot. with some additional informations</p>
<img src="https://www.data-to-viz.com/graph/bubble_files/figure-html/unnamed-chunk-1-1.png"/>

In [21]:
# Plot Bubble chart here
new_ipl=new_ipl[new_ipl['batsman_runs']==6]

six=new_ipl.groupby('batsman')['batsman_runs'].count().reset_index()

x=avg.merge(six,on='batsman')

trace=go.Scatter(x=x['avg'],y=x['batsman_runs_x'],mode='markers',marker={'size':x['batsman_runs_y']})

data=[trace]

layout=go.Layout(title='Bubble Chart',
                xaxis={'title':'Average'},
                yaxis={'title':'SR'})

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)

'temp-plot.html'

## 5. Box Plot

<p>A box and whisker plot—also called a box plot—displays the five-number summary of a set of data.</p>
<img src="https://miro.medium.com/max/18000/1*2c21SkzJMf3frPXPAR_gZA.png"/>

In [24]:
match_agg=delivery.groupby(['match_id'])['total_runs'].sum().reset_index()
season_wise=match_agg.merge(match,left_on='match_id',right_on='id')[['match_id','total_runs','season']]
season_wise

Unnamed: 0,match_id,total_runs,season
0,1,379,2017
1,2,371,2017
2,3,367,2017
3,4,327,2017
4,5,299,2017
...,...,...,...
751,11347,280,2019
752,11412,276,2019
753,11413,341,2019
754,11414,317,2019


In [29]:
# Plot Box Plot here

trace1=go.Box(x=season_wise[season_wise['season']==2017]['total_runs'],name='2017',marker={'color':'#00a65a'})
trace2=go.Box(x=season_wise[season_wise['season']==2008]['total_runs'],name='2008')

data=[trace1,trace2]

layout=go.Layout(title='Total Score Analysis',
                xaxis={'title':'Total Score'})

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)

'temp-plot.html'

### 6. Histograms

<p>A histogram is a plot that lets you discover, and show, the underlying frequency distribution (shape) of a set of continuous data.</p>

<img src="https://www.math-only-math.com/images/histogram-problems.png"/>

In [30]:
x=delivery.groupby('batsman')['batsman_runs'].count()>150
x=x[x].index.tolist()

new=delivery[delivery['batsman'].isin(x)]


runs=new.groupby('batsman')['batsman_runs'].sum()
balls=new.groupby('batsman')['batsman_runs'].count()

sr=(runs/balls)*100

sr=sr.reset_index()
sr

Unnamed: 0,batsman,batsman_runs
0,A Ashish Reddy,142.857143
1,A Mishra,89.756098
2,A Symonds,124.711908
3,AA Jhunjhunwala,99.541284
4,AB Agarkar,111.875000
...,...,...
169,Y Nagar,105.166052
170,Y Venugopal Rao,113.872832
171,YK Pathan,138.860326
172,YV Takawale,104.918033


In [39]:
# Plot Histogram

trace=go.Histogram(x=sr['batsman_runs'],xbins={'size':5,'start':50,'end':100},name='Strike Rate variations')

data=[trace]

layout=go.Layout(title='Strike Rate Analysis')

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)


'temp-plot.html'

In [40]:
# Plot Histogram

trace=go.Histogram(x=sr['batsman_runs'],xbins={'size':2,'start':50,'end':100},name='Strike Rate variations')

data=[trace]

layout=go.Layout(title='Strike Rate Analysis')

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)


'temp-plot.html'

### 7. Distplots

<p></p>
<img src="https://plot.ly/~PythonPlotBot/10/customized-distplot.png"/>

In [33]:
# Plot Distplot

import plotly.figure_factory as ff

hist_data=[avg['avg'], avg['batsman_runs']]

group_labels=['Average', 'Strike Rate']

fig=ff.create_distplot(hist_data,group_labels)

pyo.plot(fig)


'temp-plot.html'

### 8. Heatmaps

<p>A heat map is a graphical representation of data where the individual values contained in a matrix are represented as colors.</p>

<img src="https://seaborn.pydata.org/_images/heatmap_annotation.png"/>

In [34]:
six=delivery[delivery['batsman_runs']==6]
six=six.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()

six

Unnamed: 0,batting_team,over,batsman_runs
0,Chennai Super Kings,1,9
1,Chennai Super Kings,2,21
2,Chennai Super Kings,3,49
3,Chennai Super Kings,4,45
4,Chennai Super Kings,5,53
...,...,...,...
290,Sunrisers Hyderabad,16,31
291,Sunrisers Hyderabad,17,25
292,Sunrisers Hyderabad,18,49
293,Sunrisers Hyderabad,19,58


In [35]:
# Plot Heatmap

trace=go.Heatmap(x=six['batting_team'],y=six['over'],z=six['batsman_runs'])

data=[trace]

layout=go.Layout(title='Six Heatmap')

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)

'temp-plot.html'

In [36]:
# Side by Side Heatmap

dots=delivery[delivery['batsman_runs']==0]
dots=dots.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()

from plotly import tools

trace1=go.Heatmap(x=six['batting_team'],y=six['over'],
                 z=six['batsman_runs'].values.tolist())

trace2=go.Heatmap(x=dots['batting_team'],y=dots['over'],
                 z=dots['batsman_runs'].values.tolist())


fig=tools.make_subplots(rows=1,cols=2,subplot_titles=["6's","0's"], shared_yaxes=True)

fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)

pyo.plot(fig)



plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



'temp-plot.html'