# Mod 2 Summative Project - Soccer Match DB Transfer and Analysis

Importing necessary libraries for work below:

In [1]:
from SqlConn import SqlConn
from MongoHandler import MongoHandler
from PandaHandler import PandaHandler
from RainDataImproved import RainData
from dark_sky_api_remy import api_key as api_key
import numpy as np
import pandas as pd
from matplotlib.image import imread
import matplotlib.pyplot as plt
%matplotlib inline

#### Installing unlikely libraries for use in this notebook
Not a part of the Anaconda distribution

In [None]:
!pip timezonefinder

In [None]:
!pip install geopy

## Making a connection to the SQL Database and getting back a Pandas DataFrame

In [2]:
sql = SqlConn('database.sqlite')

Connection status: Active


In [3]:
df = sql.matches_df([2011])
df.head()

Connection status: Active


Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H


### Be sure to close your connection when done querying the Database!

In [4]:
sql.close_conn()

Closing connection
Connection status: Closed


## Working with the Pandas DataFrame - EDA

In [5]:
df = PandaHandler.tot_home_goals_scored(df)
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36


In [6]:
df = PandaHandler.win_loss_draw(df)
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,tot_home_goals,HomeWin,AwayWin,tot_home_win,HomeLoss,AwayLoss,tot_home_loss,HomeDraw,AwayDraw,tot_home_draw
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H,18,1,0,8,0,1,15,0,0,11
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A,46,0,1,20,1,0,4,0,0,10
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D,20,0,0,7,0,0,13,1,1,14
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H,19,1,0,8,0,1,15,0,0,11
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H,36,1,0,18,0,1,8,0,0,8


## Querying the Dark Sky API

Beware, this returns ~290 entries, and with Dark Sky API limits, you can only query 1000 entries per day. Should you exceed limits, please use a new API key

In [7]:
raindata = RainData(df.iloc[0].Date, df.iloc[-1].Date)

In [8]:
raindf = raindata.get_rain_df(api_key)

In [9]:
raindf.head()

Unnamed: 0,Date,Rain
0,2011-07-15,0
1,2011-07-16,0
2,2011-07-17,1
3,2011-07-18,1
4,2011-07-19,0


### Store the queried data from Dark Sky for use later 
(so if issues arise, you don't need to query again)

In [None]:
raindf.to_pickle('pickled_rain_df.pkl')

Read back in the Pandas Dataframe for use below

In [None]:
raindf = pd.read_pickle('pickled_rain_df.pkl')

In [None]:
raindf.head()

In [10]:
df_all_data = PandaHandler.rain_results(df, raindf)
df_all_data.head(10)

Total Number of Games: 34


Unnamed: 0_level_0,Season,GoalsScored,Wins,Losses,Draws,RainGames,RainWins,NonRainWins,RainWin%,NonRainWin%,%ChangeWinWithRain
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Aachen,2011,15,6,15,13,0.0,,,,,
Augsburg,2011,20,8,12,14,0.0,,,,,
Bayern Munich,2011,49,23,7,4,0.0,,,,,
Bochum,2011,23,10,17,7,1.0,,,,,
Braunschweig,2011,21,10,9,15,1.0,,,,,
Cottbus,2011,18,8,15,11,0.0,,,,,
Dortmund,2011,44,25,3,6,0.0,,,,,
Dresden,2011,30,12,13,9,0.0,,,,,
Duisburg,2011,23,10,15,9,1.0,,,,,
Ein Frankfurt,2011,38,20,6,8,0.0,,,,,


### Create all images in local folder for input into MongoDB

In [None]:
season = str(int(df_all_data.iloc[0].Season))
cols = ['Wins', 'Losses', 'Draws']
bar_x = [season+'_Wins', season+'_Losses', season+'_Draws']

for i in range(len(df_all_data)):
    bar_y = list(df_all_data[cols].iloc[i])
    team_name = df_all_data.index[i]
    plt.bar(x=bar_x, height=bar_y)
    plt.title(team_name)
    plt.savefig('hist_images/{}.png'.format(team_name))
    plt.clf()

#### Turns all images into numpy arrays, then cast as a list, for MongoDB interpretation

In [None]:
df_all_data['graph'] = [imread('hist_images/{}.png'.format(team_name)).tolist() for team_name in df_all_data.index]
df_all_data.head()

## Mongo DB

**Please ensure** that your Mongo Database has been opened through the terminal for smooth operation.

In [None]:
client = MongoHandler("mongodb://127.0.0.1:27017/")

In [None]:
collection = client.make_collection('team_stats_db', 'team_stats_collection')
collection

In [None]:
client.clear_collection('team_stats_db','team_stats_collection')

In [None]:
results = collection.insert_many(MongoHandler.list_of_dicts(df_all_data))
print('Items inserted: ' + len(results.inserted_ids))

In [None]:
query = client.query_db('team_stats_db', 'team_stats_collection')
for item in query:
    print(item)