In [1]:
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
from plotly import tools
import cufflinks
import plotly
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [2]:
league_19 = pd.read_csv("data/season-1819_csv.csv")
league_18 = pd.read_csv("data/season-1718_csv.csv")
league_17 = pd.read_csv("data/season-1617_csv.csv")
league_16 = pd.read_csv("data/season-1516_csv.csv")
league_15 = pd.read_csv("data/season-1415_csv.csv")
league_14 = pd.read_csv("data/season-1314_csv.csv")
league_13 = pd.read_csv("data/season-1213_csv.csv")
league_12 = pd.read_csv("data/season-1112_csv.csv")
league_11 = pd.read_csv("data/season-1011_csv.csv")
league_10 = pd.read_csv("data/season-0910_csv.csv")

In [3]:
leagues = [league_10, league_11, league_12, league_13, league_14, league_15, league_16, 
           league_17, league_18, league_19]

In [4]:
for lge in leagues:
    lge.loc[:, "Date"] = pd.to_datetime(lge.Date)

In [5]:
league_19.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
0,2018-08-10,Man United,Leicester,2,1,H,1,0,H,A Marriner,...,6,4,11,8,2,5,2,1,0,0
1,2018-08-11,Bournemouth,Cardiff,2,0,H,1,0,H,K Friend,...,4,1,11,9,7,4,1,1,0,0
2,2018-08-11,Fulham,Crystal Palace,0,2,A,0,1,A,M Dean,...,6,9,9,11,5,5,1,2,0,0
3,2018-08-11,Huddersfield,Chelsea,0,3,A,0,2,A,C Kavanagh,...,1,4,9,8,2,5,2,1,0,0
4,2018-08-11,Newcastle,Tottenham,1,2,A,1,2,A,M Atkinson,...,2,5,11,12,3,5,2,2,0,0


# Referee Investigation

In [6]:
red = [league[["Referee", "HR", "AR"]].groupby("Referee").sum() for league in leagues]
yellow = [league[["Referee", "HY", "AY"]].groupby("Referee").sum() for league in leagues]

In [7]:
base_red = red[0]
for ref in red[1:]:
    base_red = base_red.join(ref, on="Referee", how="left", lsuffix="_left", rsuffix="_right")
base_yellow = yellow[0]
for ref in yellow[1:]:
    base_yellow = base_yellow.join(ref, on="Referee", how="left", lsuffix="_left", rsuffix="_right")

In [8]:
base_red.fillna(value=0, inplace=True)
base_yellow.fillna(value=0, inplace=True)

In [9]:
reds = base_red.sum(axis=1)
yellows = base_yellow.sum(axis=1)

In [10]:
# Same referee, different spelling
reds.drop("Mn Atkinson", inplace=True)
yellows.drop("Mn Atkinson", inplace=True)

In [11]:
refs = pd.DataFrame(reds, columns=["Red_Cards"])
refs["Yellow_Cards"] = yellows

In [12]:
red_trace = go.Bar(
    x=refs.index,
    y=refs.Red_Cards,
    name='Red Cards',
    marker=dict(
        color='red')
)
yellow_trace = go.Bar(
    x=refs.index,
    y=refs.Yellow_Cards,
    name='Yellow Cards',
    marker=dict(
        color='yellow')
)
traces = [yellow_trace, red_trace]

In [13]:
fig = tools.make_subplots(rows=1, cols=2)
for d in range(len(traces)):
    fig.append_trace(traces[d], 1, d+1)
fig.layout.update(title="C'MON REF!", yaxis=dict(title="Count"))
iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



# Discussion:
As an avid soccer fan and someone who has followed the Premier League (PL) for many years, I decided to download the past 10 seasons of PL data from a free online source. The dataset includes stats for each game of the season such as home team, away team, full time score, halftime score, assists, referees, etc. The PL is the top flight soccer league in England, where many of the best players in the world perform every week from August through May. One of many influential factors in any sport is the referee. In soccer, the referee penalizes players for particularly bad fouls with either a yellow or red card. Yellow cards serve as warnings, while red cards force the player to leave the game, is banned from the next game, and the team must play the rest of the game one person short. In addition, if a red card is issued, the league officials can review the incident after the game to determine if further punishment is required, such as being banned from play for an additional number of games and/or fines. In addition, if a player receives two yellow cards in the same game, the player is then showed a red with the same implications (usually without the post game review, as this was most likely a less severe foul). Therefore, the referee has a great influence of the course of the game. The barplot above shows how many yellow and red cards each referee has shown to players over the past 10 years. As we can see, some referees are very particular about showing cards, while some hand out cards like candy on Halloween. For example, C Foy has given 363 yellow cards and 20 red cards. On the other hand, M Dean has shown 1058 yellow cards and 54 red cards. However, one problem with this barplot is that it does not account for a referee retiring, or another beginning in the course of these 10 years. Thus, some of the referees might look like they never give cards such as A Wiley, who has only recorded 81 yellow cards and 5 red cards. I would suspect that A Wiley either retired soon after 2009, or has started his PL referee career within the past year. Furthermore, it is interesting that we see almost the same pattern in both plots where referees with high yellow card counts relative to other referees also have high red card counts relative to other referees. Another takeaway from these plots is that it is clear that referees do officiate uniformly and can be very strict or lenient. So, as a player, especially one who tends to foul often, it would be useful to know who is officiating the game.