# Mod 2 Summative Project - Soccer Match DB Transfer and Analysis

In [32]:
import sqlite3
import time
import pymongo
import numpy as np
import json
import requests
import pandas as pd
import datetime
from PIL import Image
from IPython.display import HTML
from matplotlib.image import imread
from pandasql import sqldf
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
conn = sqlite3.connect('database.sqlite')
c = conn.cursor()

In [3]:
c.execute("""SELECT *
             FROM Matches
             WHERE Season IN (2011) AND Div IN ('D1','D2') 
             ORDER BY Date
             """)

df = pd.DataFrame(c.fetchall())
df.columns = [x[0] for x in c.description]
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H


In [4]:
def sum_goals(team,home_or_away):
    if home_or_away == 'home':
        return df.groupby('HomeTeam')['FTHG'].sum().loc[team]
    else:
        return df.groupby('AwayTeam')['FTAG'].sum().loc[team]

In [5]:
df['tot_home_goals'] = df.HomeTeam.apply(lambda team: sum_goals(team,'home'))
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36


In [7]:
dict_of_win = {}
for i in list(df.AwayTeam.unique()):
    dict_of_win.update({i: 0})
for n in range(len(df)):
    if df.FTR.iloc[n] == 'H':
        dict_of_win[df.HomeTeam.iloc[n]] += 1
    elif df.FTR.iloc[n] == 'A':
        dict_of_win[df.AwayTeam.iloc[n]] += 1

In [8]:
dict_of_loss = {}
for i in list(df.AwayTeam.unique()):
    dict_of_loss.update({i: 0})
for n in range(len(df)):
    if df.FTR.iloc[n] == 'H':
        dict_of_loss[df.AwayTeam.iloc[n]] += 1
    elif df.FTR.iloc[n] == 'A':
        dict_of_loss[df.HomeTeam.iloc[n]] += 1

In [9]:
dict_of_draw = {}
for i in list(df.AwayTeam.unique()):
    dict_of_draw.update({i: 0})
for n in range(len(df)):
    if df.FTR.iloc[n] == 'D':
        dict_of_draw[df.AwayTeam.iloc[n]] += 1
        dict_of_draw[df.HomeTeam.iloc[n]] += 1

In [10]:
df['tot_home_win'] = df.HomeTeam.apply(lambda team: dict_of_win[team])
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals,tot_home_win
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18,8
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46,20
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20,7
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19,8
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36,18


In [11]:
df['tot_home_loss'] = df.HomeTeam.apply(lambda team: dict_of_loss[team])
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals,tot_home_win,tot_home_loss
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18,8,15
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46,20,4
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20,7,13
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19,8,15
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36,18,8


In [12]:
df['tot_home_draw'] = df.HomeTeam.apply(lambda team: dict_of_draw[team])
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals,tot_home_win,tot_home_loss,tot_home_draw
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18,8,15,11
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46,20,4,10
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20,7,13,14
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19,8,15,11
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36,18,8,8


In [13]:
df_goals_wins = df.groupby('HomeTeam')[['tot_home_goals', 'tot_home_win', 'tot_home_loss', 'tot_home_draw']].first()
df_goals_wins.head()

Unnamed: 0_level_0,tot_home_goals,tot_home_win,tot_home_loss,tot_home_draw
HomeTeam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aachen,15,6,15,13
Augsburg,20,8,12,14
Bayern Munich,49,23,7,4
Bochum,23,10,17,7
Braunschweig,21,10,9,15


In [14]:
cols = ['tot_home_win', 'tot_home_loss', 'tot_home_draw']
bar_x = ['2011_Win', '2011_Loss', '2011_Draw']

for i in range(len(df_goals_wins)):
    bar_y = list(df_goals_wins[cols].iloc[i])
    team_name = df_goals_wins.index[i]
    plt.bar(x=bar_x, height=bar_y)
    plt.title(team_name)
    plt.savefig('hist_images/{}.png'.format(team_name))
    plt.clf()

<Figure size 432x288 with 0 Axes>

In [25]:
df_goals_wins['graph'] = [imread('hist_images/{}.png'.format(team_name)).tolist() for team_name in df_goals_wins.index]
df_goals_wins.head()

Unnamed: 0_level_0,tot_home_goals,tot_home_win,tot_home_loss,tot_home_draw,graph
HomeTeam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aachen,15,6,15,13,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Augsburg,20,8,12,14,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Bayern Munich,49,23,7,4,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Bochum,23,10,17,7,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Braunschweig,21,10,9,15,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."


In [26]:
type(df_goals_wins.iloc[0].graph)

list

### Mongo DB

In [27]:
myclient = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
mydb = myclient['team_stats_db']

In [28]:
mycollection = mydb['team_stats_collection']

In [29]:
mycollection.delete_many({})
list_of_teams = []
for i in range(len(df_goals_wins)):
    team_to_add = {'name': df_goals_wins.index[i], 
                   '2011_goals': int(df_goals_wins.iloc[i].tot_home_goals),
                   '2011_wins': int(df_goals_wins.iloc[i].tot_home_win),
                   '2011_loss': int(df_goals_wins.iloc[i].tot_home_loss),
                   '2011_draw': int(df_goals_wins.iloc[i].tot_home_draw),
                   '2011_graph': df_goals_wins.iloc[i].graph
                  }
    list_of_teams.append(team_to_add)
    time.sleep(0.5)
    
results = mycollection.insert_many(list_of_teams)
results.inserted_ids

[ObjectId('5d1655283718a7841c12a208'),
 ObjectId('5d1655283718a7841c12a209'),
 ObjectId('5d1655283718a7841c12a20a'),
 ObjectId('5d1655283718a7841c12a20b'),
 ObjectId('5d1655283718a7841c12a20c'),
 ObjectId('5d1655283718a7841c12a20d'),
 ObjectId('5d1655283718a7841c12a20e'),
 ObjectId('5d1655283718a7841c12a20f'),
 ObjectId('5d1655283718a7841c12a210'),
 ObjectId('5d1655283718a7841c12a211'),
 ObjectId('5d1655283718a7841c12a212'),
 ObjectId('5d1655283718a7841c12a213'),
 ObjectId('5d1655283718a7841c12a214'),
 ObjectId('5d1655283718a7841c12a215'),
 ObjectId('5d1655283718a7841c12a216'),
 ObjectId('5d1655283718a7841c12a217'),
 ObjectId('5d1655283718a7841c12a218'),
 ObjectId('5d1655283718a7841c12a219'),
 ObjectId('5d1655283718a7841c12a21a'),
 ObjectId('5d1655283718a7841c12a21b'),
 ObjectId('5d1655283718a7841c12a21c'),
 ObjectId('5d1655283718a7841c12a21d'),
 ObjectId('5d1655283718a7841c12a21e'),
 ObjectId('5d1655283718a7841c12a21f'),
 ObjectId('5d1655283718a7841c12a220'),
 ObjectId('5d1655283718a7

In [33]:
query = mycollection.find({})
for item in query:
    print(item)
    time.sleep(0.5)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
# The name of the team
# The total number of goals scored by the team during the 2011 season
# The total number of wins the team earned during the 2011 season
# A histogram visualization of the team's wins and losses for the 2011 season (store the visualization directly)
# The team's win percentage on days where it was raining during games in the 2011 season.

In [None]:
# Query the SQL database
# Calculate summary statistics
# Get the weather data from the DarkSky API
# Load the data into MongoDB

In [None]:
class WeatherGetter():
    pass

In [None]:
class MongoHandler():
    pass

In [None]:
berlin_coordinates = 52.5200, 13.4050

In [None]:
from PIL import Image
from IPython.display import HTML

df_goals_wins['graph'] = ['<img src="hist_images/{}.png"/>'.format(team_name) for team_name in df_goals_wins.index]
HTML(df_goals_wins.head().to_html(escape=False))

In [None]:
c.execute("""SELECT contactFirstName, contactLastName, phone, addressLine1, creditLimit
             FROM customers
             WHERE state = 'CA'
             AND creditLimit > 25000""")
df = pd.DataFrame(c.fetchall())
df.columns = [x[0] for x in c.description]
df

In [None]:
# df['tot_away_goals'] = df.AwayTeam.apply(lambda team: sum_goals(team,'away'))
# df.head()

In [None]:
# df.groupby('HomeTeam').FTR.map({'H':1, 'A':0, 'D':0}).sum()

In [None]:
c.close()
conn.close()