In [65]:
#basic class
import datetime
from datetime import date
from dateutil.parser import parse
from dateutil import tz
import json
import requests
import pandas as pd
class RainData: 
    def __init__(self, season_start_date, season_end_date):
        self.start_date = parse(season_start_date)
        self.end_date = parse(season_end_date)
        self.date_range = (parse(season_end_date) - parse(season_start_date)).days
        self.dates = [str(self.start_date + datetime.timedelta(i)).split()[0] for i in range(self.date_range)]
    
    def get_rain_df(self, api_key):
        urls = ["https://api.darksky.net/forecast/"+str(api_key)+"/52.5200,13.4050,"+str(date)+\
           "T15:00:00?exclude=currently,hourly,flags" for date in self.dates] 
        #retrieve data and unstring
        data = []
        for i in urls:
            data.append(json.loads(requests.get(i).text))
        #unnest dictionaries
        daily_data = [data[i]['daily']['data'][0] for i in range(len(data))]
        precipType = []
        for i in range(len(daily_data)):
            if 'precipType' in daily_data[i].keys():
                precipType.append(daily_data[i]['precipType'])
            else:
                precipType.append('0')
        raindf = pd.DataFrame(list(zip(self.dates, precipType)), columns =['Date', 'Rain'])
        raindf['Rain']= raindf['Rain'].map({'0':0, 'rain':1, 'sleet':0, 'snow':0})
        self.raindf = raindf
        return raindf 

In [32]:
raindf

Unnamed: 0,Date,Rain
0,2011-11-15,0
1,2011-11-16,0
2,2011-11-17,0
3,2011-11-18,1
4,2011-11-19,0


In [102]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="Je/Remy")
location = geolocator.geocode("Baghdad", language='en-US')
location.latitude

33.3024309

In [143]:
#advanced
!pip timezonefinder
!pip install geopy
import pytz
import datetime
from datetime import date
from dateutil.parser import parse
from dateutil import tz
import json
import requests
import pandas as pd
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="Je/Remy")
from timezonefinder import TimezoneFinder
tf = TimezoneFinder(in_memory=True)
TimezoneFinder.using_numba()

    
class RainData:
    
    def __init__(self, season_start_date, season_end_date, location, time_24hours ='15:00'):
        self.location = geolocator.geocode(location, language='en-US')
        self.timezone = tf.timezone_at(lng = self.location.longitude, lat = self.location.latitude)
        self.start_date = parse(season_start_date)
        self.end_date = parse(season_end_date)
        self.date_range = (parse(season_end_date) - parse(season_start_date)).days
        self.dates = [str(self.start_date + datetime.timedelta(i)).split()[0] for i in range(self.date_range)]
        offsets = [pytz.timezone(self.timezone).localize(parse(i)).strftime('%z') for i in self.dates]
        times = ['T' + time_24hours + ':00' + offset[:3] + ':' + offset[3:] for offset in offsets]
        self.datetimes = [self.dates[i] + times[i] for i in range(len(self.dates))]
        
        
    def get_rain_df(self, api_key):
        urls = ["https://api.darksky.net/forecast/"+str(api_key)+"/"+str(self.location.latitude)+','+str(self.location.longitude)+','+\
           datetime + "?exclude=currently,hourly,flags" for datetime in self.datetimes]     
        return urls
        #retrieve data and unstring
#         data = []
#         for i in urls:
#             data.append(json.loads(requests.get(i).text))
#         #unnest dictionaries
#         daily_data = [data[i]['daily']['data'][0] for i in range(len(data))]
#         precipType = []
#         for i in range(len(daily_data)):
#             if 'precipType' in daily_data[i].keys():
#                 precipType.append(daily_data[i]['precipType'])
#             else:
#                 precipType.append('0')
#         raindf = pd.DataFrame(list(zip(dates, precipType)), columns =['Date', 'Rain'])
#         raindf['Rain']= raindf['Rain'].map({'0':0, 'rain':1, 'sleet':0, 'snow':0})
#         return raindf 
    

In [41]:
c.execute("""SELECT *
             FROM Matches
             WHERE Season IN (2011) AND Div IN ('D1','D2') 
             ORDER BY Date
             """)

df = pd.DataFrame(c.fetchall())
df.columns = [x[0] for x in c.description]
df.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1133,D2,2011,2011-07-15,Cottbus,Dresden,2,1,H
1,1167,D2,2011,2011-07-15,Greuther Furth,Ein Frankfurt,2,3,A
2,1551,D2,2011,2011-07-15,Frankfurt FSV,Union Berlin,1,1,D
3,1550,D2,2011,2011-07-16,Erzgebirge Aue,Aachen,1,0,H
4,1678,D2,2011,2011-07-16,St Pauli,Ingolstadt,2,0,H


In [61]:
import sqlite3
class win_loss_record:
    def __init__(self,df):
        self.df = df
    
    def goals_scored_df(self):
        goals_scored_df = self.df.groupby('HomeTeam').FTHG.sum()+self.df.groupby('AwayTeam').FTAG.sum()
        goals_scored_df.index.rename('Team', inplace=True)
        return goals_scored_df
    
    def team_records(self):
        self.df['HomeWin'] = self.df.FTR.map({'H':1, 'A':0, 'D':0})
        self.df['AwayWin'] = self.df.FTR.map({'H':0, 'A':1, 'D':0})
        self.df['HomeLoss'] = self.df.FTR.map({'H':0, 'A':1, 'D':0})
        self.df['AwayLoss'] = self.df.FTR.map({'H':1, 'A':0, 'D':0})
        teams = pd.DataFrame()
        teams['Wins'] = self.df.groupby('HomeTeam').HomeWin.sum() + self.df.groupby('AwayTeam').AwayWin.sum()
        teams['Losses'] = self.df.groupby('HomeTeam').HomeLoss.sum() + self.df.groupby('AwayTeam').AwayLoss.sum()
        teams['Draws'] = 34 - teams.Wins - teams.Losses
        teams.index.rename('Team', inplace=True)
        return teams
    
    def team_records_graphs(self):
        teams = self.df.team_records()
        cols = teams.columns
        bar_x = ['Wins', 'Losses', 'Draws']
        for i in range(len(teams)):
            bar_y = list(teams[cols].iloc[i])
            team_name = teams.index[i]
            plt.bar(x=bar_x, height=bar_y)
            plt.title(team_name)
            plt.savefig('hist_images/{}.png'.format(team_name))
            plt.clf()
        teams['graph'] = [imread('hist_images/{}.png'.format(team_name)).tolist() for team_name in teams.index]
        return teams
        
        
    def rain_results(self, raindf):
        self.df = self.df.merge(raindf, on='Date', how='left')
        teams = match_results()
        teams['RainGames'] = self.df.groupby('HomeTeam').Rain.sum() + self.df.groupby('AwayTeam').Rain.sum()
        teams['RainWins'] = self.df[self.df.Rain==1].groupby('HomeTeam').HomeWin.sum() + self.df[dself.f.Rain==1].groupby('AwayTeam').AwayWin.sum()
        teams['NonRainWins'] = teams.Wins - teams.RainWins
        teams['RainWin%'] = teams.RainWins/teams.RainGames
        teams['NonRainWin%'] = teams.NonRainWins/(34-teams.RainGames)
        teams['%ChangeWinWithRain'] = (teams['RainWin%']-teams['NonRainWin%'])/teams['NonRainWin%']
        return teams
    
    


In [None]:
def rain_results(self, raindf):
        self.df = self.df.merge(raindf, on='Date', how='left')
        teams = pd.DataFrame()
        teams['Wins'] = df.groupby('HomeTeam').HomeWin.sum() + df.groupby('AwayTeam').AwayWin.sum()
        teams['Losses'] = df.groupby('HomeTeam').HomeLoss.sum() + df.groupby('AwayTeam').AwayLoss.sum()
        teams['Draws'] = 34 - teams.Wins - teams.Losses
        teams['RainGames'] = df.groupby('HomeTeam').Rain.sum() + df.groupby('AwayTeam').Rain.sum()
        teams['RainWins'] = df[df.Rain==1].groupby('HomeTeam').HomeWin.sum() + df[df.Rain==1].groupby('AwayTeam').AwayWin.sum()
        teams['NonRainWins'] = teams.Wins - teams.RainWins
        teams['RainWin%'] = teams.RainWins/teams.RainGames
        teams['NonRainWin%'] = teams.NonRainWins/(34-teams.RainGames)
        teams['%ChangeWinWithRain'] = (teams['RainWin%']-teams['NonRainWin%'])/teams['NonRainWin%']
        teams.index.rename('Team', inplace=True)

In [None]:
coordinates 