# Daily Queries 2

In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine, inspect
from IPython.display import display
from pprint import pprint

<a class="anchor" id="0_toc"></a>
# Table of Contents
***

1. [SQL Engine](#1-engine)
2. [Tables](#2-tables)
3. [Queries](#3-queries)
    1. [What is the position played by the majority of the squad?](#q1)
    2. [Which players played the most games during the entire season?](#q2)
    3. [Out of the players who played less than 1000 minutes in the league who scored the most goals?](#q3)
    4. [Which players played the most minutes in the league? Show top 5.](#q4)
    5. [How many assists were made by the forwards?](#q5)
    6. [Who are the top 3 players with the best goals per minute record?](#q6)
    7. [How many players did not contribute goals or assists in the league?](#q7)
    8. [Who is the player that has the most yellow cards in the league?  
    (If there is a tie then sort by minutes played)](#q8)
    9. [Which defender contribute the most goals and assists in the league?](#q9)
    10. [How many players played more then 50 hours total in the league and in the champions league?](#q10)

<a class="anchor" id="1-engine"></a>
## SQL Engine
***
[back to Table of Contents](#0_toc)

In [2]:
db_name = 'data-analyst-fc-barcelona-20-21-db'

In [3]:
db_config = {'user': 'practicum_student',         # username
             'pwd': 's65BlTKV3faNIGhmvJVzOqhs', # password
             'host': 'rc1b-wcoijxj3yxfsf3fs.mdb.yandexcloud.net',
             'port': 6432,              # connection port
             'db': db_name}          # the name of the database

connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config['user'],
                                                                     db_config['pwd'],
                                                                       db_config['host'],
                                                                       db_config['port'],
                                                                       db_config['db'])

engine = create_engine(connection_string, connect_args={'sslmode':'require'})
inspector = inspect(engine)

In [4]:
def read_schema(table_name):
    return pd.DataFrame(inspector.get_columns(table_name)).rename_axis(table_name, axis=1)

In [5]:
def execute_query(q):
    return pd.io.sql.read_sql(q, con=engine)

<a class="anchor" id="2-tables"></a>
## Inspect tables
***
[back to Table of Contents](#0_toc)

In [6]:
tables = inspector.get_table_names()
tables

['squad',
 'games_played',
 'league_stats',
 'champions_stats',
 'cup_stats',
 'supercup_stats']

In [7]:
for table in tables:
    display(read_schema(table))

squad,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,first_name,TEXT,True,,False,
2,last_name,TEXT,True,,False,
3,position,TEXT,True,,False,


games_played,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,league,INTEGER,True,,False,
2,champions,INTEGER,True,,False,
3,cup,INTEGER,True,,False,
4,supercup,INTEGER,True,,False,


league_stats,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,league_goals,INTEGER,True,,False,
2,league_assists,INTEGER,True,,False,
3,league_yellow,INTEGER,True,,False,
4,league_red,INTEGER,True,,False,
5,league_minutes,INTEGER,True,,False,


champions_stats,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,champions_goals,INTEGER,True,,False,
2,champions_assists,INTEGER,True,,False,
3,champions_yellow,INTEGER,True,,False,
4,champions_red,INTEGER,True,,False,
5,champions_minutes,INTEGER,True,,False,


cup_stats,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,cup_goals,INTEGER,True,,False,
2,cup_assists,INTEGER,True,,False,
3,cup_yellow,INTEGER,True,,False,
4,cup_red,INTEGER,True,,False,
5,cup_minutes,INTEGER,True,,False,


supercup_stats,name,type,nullable,default,autoincrement,comment
0,player_id,INTEGER,True,,False,
1,supercup_goals,INTEGER,True,,False,
2,supercup_assists,INTEGER,True,,False,
3,supercup_yellow,INTEGER,True,,False,
4,supercup_red,INTEGER,True,,False,
5,supercup_minutes,INTEGER,True,,False,


In [8]:
for table in tables:
    display(execute_query(f'SELECT * FROM {table} LIMIT 1'))

Unnamed: 0,player_id,first_name,last_name,position
0,1,Marc-André,Ter Stegen,gk


Unnamed: 0,player_id,league,champions,cup,supercup
0,1,31,5,4,2


Unnamed: 0,player_id,league_goals,league_assists,league_yellow,league_red,league_minutes
0,1,0,0,1,0,2790


Unnamed: 0,player_id,champions_goals,champions_assists,champions_yellow,champions_red,champions_minutes
0,1,0,0,0,0,450


Unnamed: 0,player_id,cup_goals,cup_assists,cup_yellow,cup_red,cup_minutes
0,1,0,0,0,0,420


Unnamed: 0,player_id,supercup_goals,supercup_assists,supercup_yellow,supercup_red,supercup_minutes
0,1,0,0,0,0,240


<a class="anchor" id="3-queries"></a>
## Queries
***
[back to Table of Contents](#0_toc)

1. [What is the position played by the majority of the squad?](#q1)
2. [Which players played the most games during the entire season?](#q2)
3. [Out of the players who played less than 1000 minutes in the league who scored the most goals?](#q3)
4. [Which players played the most minutes in the league? Show top 5.](#q4)
5. [How many assists were made by the forwards?](#q5)
6. [Who are the top 3 players with the best goals per minute record?](#q6)
7. [How many players did not contribute goals or assists in the league?](#q7)
8. [Who is the player that has the most yellow cards in the league?  
(If there is a tie then sort by minutes played)](#q8)
9. [Which defender contribute the most goals and assists in the league?](#q9)
10. [How many players played more then 50 hours total in the league and in the champions league?](#q10)

<a class="anchor" id="q1"></a>
### 1. What is the position played by the majority of the squad?
[up](#3-queries)

In [9]:
execute_query("""
SELECT
    position,
    count(position)
FROM
    squad
GROUP BY
    position
ORDER BY
    count DESC
--LIMIT 1
""")

Unnamed: 0,position,count
0,df,9
1,fw,8
2,mf,8
3,gk,4


Defense, with 9 players.

<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q2"></a>
### 2. Which players played the most games during the entire season?
[up](#3-queries)

In [10]:
top = 5

execute_query(f"""
SELECT
    CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
    (league + champions + cup + supercup) AS total_games_played
FROM
    games_played
        LEFT JOIN squad ON squad.player_id = games_played.player_id
ORDER BY
    total_games_played DESC
LIMIT {top}
""")

Unnamed: 0,full_name,total_games_played
0,"Lopez, Pedro",52
1,"De Jong, Frenkie",51
2,"Griezmann, Antoine",51
3,"Busquets, Sergio",50
4,"Alba, Jordi",49


<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q3"></a>
### 3. Out of the players who played less than 1000 minutes in the league, <br />&nbsp;&nbsp;&nbsp;&nbsp;who scored the most goals?
[up](#3-queries)

In [11]:
top = 5

execute_query(f"""
SELECT
    CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
    league_stats.league_goals
FROM
    league_stats
        LEFT JOIN squad ON squad.player_id = league_stats.player_id
WHERE
    league_stats.league_minutes < 1000
ORDER BY
    league_stats.league_goals DESC
LIMIT {top}
""")

Unnamed: 0,full_name,league_goals
0,"Fati, Ansu",4
1,"Trincão, Francisco",3
2,"Coutinho, Philippe",2
3,"Roberto, Sergi",1
4,"Firpo, Junior",1


Fati, Ansu scored most (4) league goals under 1000 minutes played.

<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q4"></a>
### 4. Which players played the most minutes in the league? Show top 5.
[up](#3-queries)

In [12]:
top = 5

execute_query(f"""
SELECT
    CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
    league_stats.league_minutes
FROM
    league_stats
        LEFT JOIN squad ON squad.player_id = league_stats.player_id
ORDER BY
    league_stats.league_minutes DESC
LIMIT {top}
""")

Unnamed: 0,full_name,league_minutes
0,"De Jong, Frenkie",3158
1,"Alba, Jordi",3030
2,"Messi, Lionel",3022
3,"Ter Stegen, Marc-André",2790
4,"Griezmann, Antoine",2619


<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q5"></a>
### 5. How many assists were made by the forwards?
[up](#3-queries)

In [13]:
execute_query("""
SELECT
    fw_table.full_name,
    (league_assists + champions_assists + cup_assists + supercup_assists) AS total_assists
FROM
    (SELECT
        player_id,
        CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
        position
    FROM
        squad
    WHERE
        position = 'fw') AS fw_table
    LEFT JOIN league_stats ON league_stats.player_id = fw_table.player_id
    LEFT JOIN champions_stats ON champions_stats.player_id = fw_table.player_id
    LEFT JOIN cup_stats ON cup_stats.player_id = fw_table.player_id
    LEFT JOIN supercup_stats ON supercup_stats.player_id = fw_table.player_id
ORDER BY
    total_assists DESC
""")

Unnamed: 0,full_name,total_assists
0,"Messi, Lionel",14
1,"Griezmann, Antoine",13
2,"Dembélé, Ousmane",5
3,"Fati, Ansu",4
4,"Braithwaite, Martin",4
5,"Trincão, Francisco",2
6,"De La Fuente, Konard",0
7,"Collado, Álex",0


<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q6"></a>
### 6. Who are the top 3 players with the best goals per minute record?
[up](#3-queries)

In [14]:
top = 3

execute_query(f"""
SELECT
    *,
    (CASE WHEN total_minutes <> 0 
          THEN total_goals / total_minutes::float
          ELSE 0 END) AS goals_minutes_ratio
FROM
    (SELECT
        fw_table.full_name,
        (league_goals + champions_goals + cup_goals + supercup_goals) AS total_goals,
        (league_minutes + champions_minutes + cup_minutes + supercup_minutes) AS total_minutes
    FROM
        (SELECT
            player_id,
            CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
            position
        FROM
            squad
        WHERE
            position = 'fw'
        ) AS fw_table
            LEFT JOIN league_stats ON league_stats.player_id = fw_table.player_id
            LEFT JOIN champions_stats ON champions_stats.player_id = fw_table.player_id
            LEFT JOIN cup_stats ON cup_stats.player_id = fw_table.player_id
            LEFT JOIN supercup_stats ON supercup_stats.player_id = fw_table.player_id
    ) AS subq
ORDER BY
    goals_minutes_ratio DESC
LIMIT {top}
""")

Unnamed: 0,full_name,total_goals,total_minutes,goals_minutes_ratio
0,"Messi, Lionel",38,4192,0.009065
1,"Fati, Ansu",5,596,0.008389
2,"Griezmann, Antoine",20,3903,0.005124


<div class="alert alert-success" role="alert">
  Great! - but the meaning was if to get minutes it's take the player to score you only need to change your calculation to total_minutes / total goals😊
<br>If you want here is a nice solution:    
</div>

In [16]:
query = '''
         SELECT 
            s.first_name, 
            s.last_name,
            ((scup.supercup_minutes + cup.cup_minutes + cs.champions_minutes + ls.league_minutes) /
            (scup.supercup_goals + cup.cup_goals + cs.champions_goals + ls.league_goals)) AS minutes_per_goal
        FROM
            squad s
        JOIN supercup_stats scup ON s.player_id = scup.player_id
        JOIN cup_stats cup ON s.player_id = cup.player_id
        JOIN champions_stats cs ON s.player_id = cs.player_id
        JOIN league_stats ls ON s.player_id = ls.player_id
        WHERE (scup.supercup_goals + cup.cup_goals + cs.champions_goals + ls.league_goals) > 0
        ORDER BY
            minutes_per_goal
        LIMIT 3;
'''
execute_query(query)

Unnamed: 0,first_name,last_name,minutes_per_goal
0,Lionel,Messi,110
1,Ansu,Fati,119
2,Antoine,Griezmann,195


<a class="anchor" id="q7"></a>
### 7. How many players did not contribute goals or assists in the league?
[up](#3-queries)

In [17]:
execute_query("""
SELECT
    COUNT(1)
FROM
    league_stats
WHERE
    league_goals = 0 OR league_assists = 0
""")

Unnamed: 0,count
0,13


<div class="alert alert-success" role="alert">
  Great! - but this question meant not contribute both goals and assists but it's just a small change from OR to AND👍
</div>

<a class="anchor" id="q8"></a>
### 8. Who is the player that has the most yellow cards in the league? <br />&nbsp;&nbsp;&nbsp;&nbsp;(If there is a tie then sort by minutes played)
[up](#3-queries)

In [18]:
top = 5

execute_query(f"""
SELECT
    CONCAT(squad.last_name, ', ', squad.first_name) AS full_name,
    league_stats.league_yellow,
    league_stats.league_minutes
FROM
    league_stats
        LEFT JOIN squad ON squad.player_id = league_stats.player_id
ORDER BY
    league_yellow DESC,
    league_minutes
LIMIT {top}
""")

Unnamed: 0,full_name,league_yellow,league_minutes
0,"Busquets, Sergio",9,2527
1,"Alba, Jordi",9,3030
2,"Lenglet, Clément",7,2476
3,"De Jong, Frenkie",5,3158
4,"Mingueza, Óscar",4,1901


Busquets, Sergio has most (9) league yellow cards in least amount of minutes played (2527).

<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q9"></a>
### 9. Which defender contribute the most goals and assists in the league?
[up](#3-queries)

In [19]:
top = 5

execute_query(f"""
SELECT
    full_name,
    league_goals,
    league_assists,
    league_goals + league_assists AS total_contributions
FROM
    (SELECT
        player_id,
        CONCAT(last_name, ', ', first_name) AS full_name
    FROM
        squad
    WHERE
        position = 'df'
    ) as df_table
        LEFT JOIN league_stats ON league_stats.player_id = df_table.player_id
ORDER BY
    total_contributions DESC
--LIMIT {top}
""")

Unnamed: 0,full_name,league_goals,league_assists,total_contributions
0,"Alba, Jordi",3,7,10
1,"Mingueza, Óscar",2,2,4
2,"Dest, Sergiño",2,1,3
3,"Roberto, Sergi",1,2,3
4,"Araújo, Ronald",2,1,3
5,"Firpo, Junior",1,1,2
6,"Lenglet, Clément",1,0,1
7,"Umtiti, Samuel",0,0,0
8,"Piqué, Gerard",0,0,0


Alba, Jordi contributed mostly (10) as a league defender, with 3 goals and 7 assists.

<div class="alert alert-success" role="alert">
  Great!
</div>

<a class="anchor" id="q10"></a>
### 10. How many players played more then 50 hours total in the league and in the champions league?
[up](#3-queries)

In [20]:
execute_query("""
SELECT
    CONCAT(last_name, ', ', first_name) AS full_name,
    ROUND((league_minutes + champions_minutes) / 60.0, 2) AS league_champions_hours
FROM
    squad
        LEFT JOIN league_stats ON league_stats.player_id = squad.player_id
        LEFT JOIN champions_stats ON champions_stats.player_id = squad.player_id
WHERE
    (league_minutes + champions_minutes) / 60 > 50
ORDER BY
    league_champions_hours DESC
""")

Unnamed: 0,full_name,league_champions_hours
0,"De Jong, Frenkie",62.37
1,"Messi, Lionel",59.37
2,"Alba, Jordi",58.57
3,"Ter Stegen, Marc-André",54.0
4,"Griezmann, Antoine",52.03


<div class="alert alert-warning" role="alert">
Make sure to use real type or 60.0 in the WHERE block as well you seem to forget a player
</div>

5 players have played more than 50 hours in the league and champions combined.

In [21]:
execute_query("""
SELECT
    CONCAT(last_name, ', ', first_name) AS full_name,
    ROUND((league_minutes + champions_minutes) / 60.0, 2) AS league_champions_hours
FROM
    squad
        LEFT JOIN league_stats ON league_stats.player_id = squad.player_id
        LEFT JOIN champions_stats ON champions_stats.player_id = squad.player_id
WHERE
    (league_minutes + champions_minutes) / 60.0 > 50
ORDER BY
    league_champions_hours DESC
""")

Unnamed: 0,full_name,league_champions_hours
0,"De Jong, Frenkie",62.37
1,"Messi, Lionel",59.37
2,"Alba, Jordi",58.57
3,"Ter Stegen, Marc-André",54.0
4,"Griezmann, Antoine",52.03
5,"Lenglet, Clément",50.63
