# Mod 2 Summative Project - Soccer Match DB Transfer and Analysis

In [1]:
from SqlConn import SqlConn
from MongoHandler import MongoHandler
from api_pull import api_pull
import time
import numpy as np
import json
import requests
import pandas as pd
import datetime
from matplotlib.image import imread
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
sql = SqlConn('database.sqlite')

Connection status: Active


In [3]:
df = sql.matches_df(2011)
df.head()

Connection status: Active


Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H


In [4]:
sql.close_conn()

Closing connection
Connection status: Closed


In [5]:
def sum_goals(team,home_or_away):
    if home_or_away == 'home':
        return df.groupby('HomeTeam')['FTHG'].sum().loc[team]
    else:
        return df.groupby('AwayTeam')['FTAG'].sum().loc[team]
    
df['tot_home_goals'] = df.HomeTeam.apply(lambda team: sum_goals(team,'home'))
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36


In [6]:
df['HomeWin'] = df.FTR.map({'H':1, 'A':0, 'D':0})
df['AwayWin'] = df.FTR.map({'H':0, 'A':1, 'D':0})
wins = df.groupby('HomeTeam').HomeWin.sum() + df.groupby('AwayTeam').AwayWin.sum()

df['tot_home_win'] = df.HomeTeam.apply(lambda team: wins[team])


df['HomeLoss'] = df.FTR.map({'H':0, 'A':1, 'D':0})
df['AwayLoss'] = df.FTR.map({'H':1, 'A':0, 'D':0})
losses = df.groupby('HomeTeam').HomeLoss.sum() + df.groupby('AwayTeam').AwayLoss.sum()

df['tot_home_loss'] = df.HomeTeam.apply(lambda team: losses[team])


df['HomeDraw'] = df.FTR.map({'H':0, 'A':0, 'D':1})
df['AwayDraw'] = df.FTR.map({'H':0, 'A':0, 'D':1})
draws = df.groupby('HomeTeam').HomeDraw.sum() + df.groupby('AwayTeam').AwayDraw.sum()

df['tot_home_draw'] = df.HomeTeam.apply(lambda team: draws[team])

df = df.drop(['HomeWin', 'AwayWin', 'HomeLoss', 'AwayLoss', 'HomeDraw', 'AwayDraw'], axis=1)

df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals,tot_home_win,tot_home_loss,tot_home_draw
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18,8,15,11
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46,20,4,10
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20,7,13,14
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19,8,15,11
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36,18,8,8


In [7]:
df_goals_wins = df.groupby('HomeTeam')[['Season', 'tot_home_goals', 'tot_home_win', 'tot_home_loss', 'tot_home_draw']].first()
df_goals_wins.index.rename('Team', inplace=True)
df_goals_wins.columns = ['Season', 'GoalsScored', 'Wins', 'Losses', 'Draws']
df_goals_wins.head()

Unnamed: 0_level_0,Season,GoalsScored,Wins,Losses,Draws
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aachen,2011,15,6,15,13
Augsburg,2011,20,8,12,14
Bayern Munich,2011,49,23,7,4
Bochum,2011,23,10,17,7
Braunschweig,2011,21,10,9,15


In [8]:
df_goals_wins['RainGames'] = 2
df_goals_wins['RainWins'] = 1
df_goals_wins['NonRainWins'] = 1
df_goals_wins['RainWin%'] = 0.5
df_goals_wins['NonRainWin%'] = 0.5
df_goals_wins['%ChangeWinWithRain'] = 0.5
df_goals_wins.head()

Unnamed: 0_level_0,Season,GoalsScored,Wins,Losses,Draws,RainGames,RainWins,NonRainWins,RainWin%,NonRainWin%,%ChangeWinWithRain
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Aachen,2011,15,6,15,13,2,1,1,0.5,0.5,0.5
Augsburg,2011,20,8,12,14,2,1,1,0.5,0.5,0.5
Bayern Munich,2011,49,23,7,4,2,1,1,0.5,0.5,0.5
Bochum,2011,23,10,17,7,2,1,1,0.5,0.5,0.5
Braunschweig,2011,21,10,9,15,2,1,1,0.5,0.5,0.5


In [9]:
# RainGames    RainWins    NonRainWins    RainWin%    NonRainWin%    %ChangeWinWithRain

In [10]:
season = str(int(df_goals_wins.iloc[0].Season))
cols = ['Wins', 'Losses', 'Draws']
bar_x = [season+'_Wins', season+'_Losses', season+'_Draws']

for i in range(len(df_goals_wins)):
    bar_y = list(df_goals_wins[cols].iloc[i])
    team_name = df_goals_wins.index[i]
    plt.bar(x=bar_x, height=bar_y)
    plt.title(team_name)
    plt.savefig('hist_images/{}.png'.format(team_name))
    plt.clf()

<Figure size 432x288 with 0 Axes>

In [11]:
df_goals_wins['graph'] = [imread('hist_images/{}.png'.format(team_name)).tolist() for team_name in df_goals_wins.index]
df_goals_wins.head()

Unnamed: 0_level_0,Season,GoalsScored,Wins,Losses,Draws,RainGames,RainWins,NonRainWins,RainWin%,NonRainWin%,%ChangeWinWithRain,graph
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Aachen,2011,15,6,15,13,2,1,1,0.5,0.5,0.5,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Augsburg,2011,20,8,12,14,2,1,1,0.5,0.5,0.5,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Bayern Munich,2011,49,23,7,4,2,1,1,0.5,0.5,0.5,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Bochum,2011,23,10,17,7,2,1,1,0.5,0.5,0.5,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."
Braunschweig,2011,21,10,9,15,2,1,1,0.5,0.5,0.5,"[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], ..."


### Mongo DB

Please ensure that your Mongo Database has been opened through the terminal for smooth operation.

In [12]:
client = MongoHandler("mongodb://127.0.0.1:27017/")

In [13]:
collection = client.make_collection('team_stats_db', 'team_stats_collection')
collection

Now in the team_stats_collection collection in the team_stats_db database


Collection(Database(MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True), 'team_stats_db'), 'team_stats_collection')

In [14]:
client.clear_collection('team_stats_db','team_stats_collection')

Collection team_stats_collection cleared


In [15]:
results = collection.insert_many(MongoHandler.list_of_dicts(df_goals_wins))
results.inserted_ids

[ObjectId('5d1a113a2c3cc7ab3ab87011'),
 ObjectId('5d1a113a2c3cc7ab3ab87012'),
 ObjectId('5d1a113a2c3cc7ab3ab87013'),
 ObjectId('5d1a113a2c3cc7ab3ab87014'),
 ObjectId('5d1a113a2c3cc7ab3ab87015'),
 ObjectId('5d1a113a2c3cc7ab3ab87016'),
 ObjectId('5d1a113a2c3cc7ab3ab87017'),
 ObjectId('5d1a113a2c3cc7ab3ab87018'),
 ObjectId('5d1a113a2c3cc7ab3ab87019'),
 ObjectId('5d1a113a2c3cc7ab3ab8701a'),
 ObjectId('5d1a113a2c3cc7ab3ab8701b'),
 ObjectId('5d1a113a2c3cc7ab3ab8701c'),
 ObjectId('5d1a113a2c3cc7ab3ab8701d'),
 ObjectId('5d1a113a2c3cc7ab3ab8701e'),
 ObjectId('5d1a113a2c3cc7ab3ab8701f'),
 ObjectId('5d1a113a2c3cc7ab3ab87020'),
 ObjectId('5d1a113a2c3cc7ab3ab87021'),
 ObjectId('5d1a113a2c3cc7ab3ab87022'),
 ObjectId('5d1a113a2c3cc7ab3ab87023'),
 ObjectId('5d1a113a2c3cc7ab3ab87024'),
 ObjectId('5d1a113a2c3cc7ab3ab87025'),
 ObjectId('5d1a113a2c3cc7ab3ab87026'),
 ObjectId('5d1a113a2c3cc7ab3ab87027'),
 ObjectId('5d1a113a2c3cc7ab3ab87028'),
 ObjectId('5d1a113a2c3cc7ab3ab87029'),
 ObjectId('5d1a113a2c3cc7

In [16]:
query = client.query_db('team_stats_db', 'team_stats_collection')
for item in query:
    print(item)

{'name': 'Aachen', '_Season': 2011, '_GoalsScored': 15, '_Wins': 6, '_Losses': 15, '_Draws': 13, '_RainGames': 2, '_RainWins': 1, '_NonRainWins': 1, '_RainWin%': 0.5, '_NonRainWin%': 0.5, '_%ChangeWinWithRain': 0.5}
{'name': 'Augsburg', '_Season': 2011, '_GoalsScored': 20, '_Wins': 8, '_Losses': 12, '_Draws': 14, '_RainGames': 2, '_RainWins': 1, '_NonRainWins': 1, '_RainWin%': 0.5, '_NonRainWin%': 0.5, '_%ChangeWinWithRain': 0.5}
{'name': 'Bayern Munich', '_Season': 2011, '_GoalsScored': 49, '_Wins': 23, '_Losses': 7, '_Draws': 4, '_RainGames': 2, '_RainWins': 1, '_NonRainWins': 1, '_RainWin%': 0.5, '_NonRainWin%': 0.5, '_%ChangeWinWithRain': 0.5}
{'name': 'Bochum', '_Season': 2011, '_GoalsScored': 23, '_Wins': 10, '_Losses': 17, '_Draws': 7, '_RainGames': 2, '_RainWins': 1, '_NonRainWins': 1, '_RainWin%': 0.5, '_NonRainWin%': 0.5, '_%ChangeWinWithRain': 0.5}
{'name': 'Braunschweig', '_Season': 2011, '_GoalsScored': 21, '_Wins': 10, '_Losses': 9, '_Draws': 15, '_RainGames': 2, '_RainWi

# Working Space

In [None]:
# The name of the team
# The total number of goals scored by the team during the 2011 season
# The total number of wins the team earned during the 2011 season
# A histogram visualization of the team's wins and losses for the 2011 season (store the visualization directly)
# The team's win percentage on days where it was raining during games in the 2011 season.

In [None]:
# Query the SQL database
# Calculate summary statistics
# Get the weather data from the DarkSky API
# Load the data into MongoDB

In [None]:
berlin_coordinates = 52.5200, 13.4050

In [None]:
class MongoHandler():
    pass

In [None]:
class WeatherGetter():
    pass

In [None]:
from PIL import Image
from IPython.display import HTML

df_goals_wins['graph'] = ['<img src="hist_images/{}.png"/>'.format(team_name) for team_name in df_goals_wins.index]
HTML(df_goals_wins.head().to_html(escape=False))