In [None]:
import urllib.request
import time
import random
from bs4 import BeautifulSoup

#Used to parse though html to find data on ESPN.com
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')

    tbl = soup.find('table')
    dataset = list()

    for i,row in enumerate(tbl.find_all('tr')):
        rowData = list()
        for j,column in enumerate(row.find_all('td')):
            rowData.append((column.text))
        dataset.append(rowData)
    return dataset
                         

for y in range (2015, 2017, 1):
    
    tmp = random.random()*5.0
    print ('Sleep for ', tmp, ' seconds')
    time.sleep(tmp)

    if y == 2016:
        url = 'http://www.espn.com/nfl/statistics/player/_/stat/passing/sort/passingYards'
        #Download
        filehandle = urllib.request.urlopen(url)
        html = filehandle.read()

        data1 = []
        data1.append(parse_html(html))
        data1 = data1[0]
        
        count1 = 0
        
        for player in data1:
            if player[0] == 'RK':
                del data1[count1]
                count1 += 1
            else:
                player = player[6]
                count1 += 1
                
    else:
        url = 'http://www.espn.com/nfl/statistics/player/_/stat/passing/sort/passingYards/year/' + str(y)
            #Download
        filehandle = urllib.request.urlopen(url)
        html = filehandle.read()

        data2 = []
        data2.append(parse_html(html))
        data2 = data2[0]
        
        count2 = 0
        
        for player in data2:
            if player[0] == 'RK':
                del data2[count2]
                count2 += 1
            else:
                count2 += 1
                
    print ('Download from :', url)
    
#Make Dictionary {PlayerName:Touchdowns}
year2015 = {}
touch2015 = []
for player in data1:
    touch2015.append(int(player[9]))
    if int(player[9]) >= 20:
        name = player[9]
        if name in year2015:
            year2015[name] += 1
        else:
            year2015[name] = 1
    
year2016 = {}
touch2016 = []
for player in data2:
    touch2016.append(int(player[9]))
    if int(player[9]) >= 20:
        name = player[9]
        if name in year2016:
            year2016[name] += 1
        else:
            year2016[name] = 1
    
#Finds Mean, Average, and variance
#This function then returns the values that will be plotted onto the graph

def probability_distribution (outcomes):
    
    average = 0.0
    variance = 0.0
    
    touchdowns = {}
    amount = 0.0
    
    ##count number of observations
    for x in outcomes:
        if x not in touchdowns:
            touchdowns[x] = 0.0
        touchdowns[x] += 1.0
        amount += 1.0
        
        average += x
        variance += x*x
        
        
    average /= amount
    variance /= amount
    variance = variance - average * average
        
        
    ##normalize pdf
    for x in touchdowns:
        touchdowns[x] /= amount
    
    
    return touchdowns, average, variance

from scipy.stats import poisson

tds15, av15, var15 = probability_distribution(touch2015)
tds16, av16, var16 = probability_distribution(touch2016)


import matplotlib.pyplot as plt # Import matplotlib for graphs and visualizations

#This section of code creates the first bar graph that shows there is not a normal distribution

plt.figure(figsize=(10,10))
plt.rc('text', usetex=True)
plt.rc('font', size=24)
plt.rcParams['xtick.major.pad'] = 10
plt.rcParams['ytick.major.pad'] = 10

title = '2015 Quarterback Touchdowns\n'
title += '$\\langle x \\rangle = ' + '% .2f' % av15 + ' \\quad \\sigma^2 = ' + '% .2f' % var15 + '$'
plt.title(title, fontsize = 30)


plt.xlabel('Touchdowns', color='b')
plt.ylabel('Probability Distribution', color='b')


x = []
Px = []
for q in tds15:
    x.append(q)
    Px.append(tds15[q])
    

plt.bar(x, Px, width=1, color = 'blue', align='center', alpha=0.5)

plt.plot(x, poisson.pmf(x, av15), linestyle='-', linewidth=2.0, color='k', label='2015')
print()
print()
plt.show()

plt.figure(figsize=(10,10))
plt.rc('text', usetex=True)
plt.rc('font', size=24)
plt.rcParams['xtick.major.pad'] = 10
plt.rcParams['ytick.major.pad'] = 10

#We then create a bar graph to determine if there are any more similarities in the two years of data we have pulled

title = '2016 Quarterback Touchdowns\n'
title += '$\\langle x \\rangle = ' + '% .2f' % av16 + ' \\quad \\sigma^2 = ' + '% .2f' % var16 + '$'
plt.title(title, fontsize = 30)


plt.xlabel('Touchdowns', color='b')
plt.ylabel('Probability Distribution',color='b')


##construct two lists for  visualization
x = []
Px = []
for q in tds16:
    x.append(q)
    Px.append(tds16[q])
    

plt.bar(x, Px, width = 1, color = 'green', align='center', alpha=0.5)


print()
print()
plt.show()


#Having these two charts we were able to compare the two and determine which year the NFL quarter backs better preformed
#In conclusion 2016 had a much better year, where four qbs threw fro 35 tocuhdowns


In [None]:
#Here we create a graph with both 2015 and 2016 data to get a better comparison
x = np.array(list(year2015.keys()))
y = np.array(list(year2015.values()))

x2 = np.array(list(year2016.keys()))
y2 = np.array(list(year2016.values()))


plt.figure(figsize=(10,10))
#plt.rc('text', usetex=True)
plt.rc('font', size=24)

title = 'Quarterback Touchdowns 2015 and 2016'
plt.title(title, fontsize = 35)

plt.xlabel('Touchdowns', fontsize=24, color='b') # Label text and its properties
plt.ylabel('Number of Quaterbacks', fontsize=24, color='b')

plt.ylim(ymax=5, ymin=0)
plt.tick_params(axis='both', which='major', labelsize=20)

plt.grid(True)

plt.scatter(x,y, label='2015', s=1500, alpha=0.5, linewidth=0)
plt.scatter(x2,y2, label='2016', c='r', s=1500, alpha=0.5, linewidth=0)
_ = plt.legend(loc='upper left', fontsize=24, scatterpoints=1)


#After pulling the data from espn.com, parsing through HTML to find the values we deisred, we find that
#Quarterbacks that threw over 20 touchdowns had much better performance.
#You can see visibly in both graphs that those who excelled, excelled at a higher level than the previous year.