
#### 1.1 How many goals were attempted by head
#### 1.2 How many matches had attendees more than 12000
#### 2.1 Shots list where goal was scored outside 16m box sorted by descending order
#### 2.2 Rank teams based on average attendance at home games 

In [1]:
from MySqlStruct import *

@measure_execution_time
def MySqlExecTime():
    return MySql_connect()

print('----Loading MySql----')
result, execution_time = MySqlExecTime()  # Get both the result and execution time

# Unpack the values from the result
cursor,connection,df_shots,df_matches,df_players,df_teams = result

----Loading MySql----
Inserted 245180 rows into shots.
Inserted 14148 rows into matches.
Inserted 20414 rows into all_players.
Inserted 675 rows into teams.

Data inserted successfully for all files!
Execution time: 6.686674118041992 seconds


In [2]:
# print(df_shots.columns)
# print(df_shots.head(5))
# print(df_matches.columns)
# print(df_matches.head(5))
# print(df_players.columns)
# print(df_players.head(5))
# print(df_teams.columns)
# print(df_teams.head(5))


# 1.1 How many goals were attempted by head

In [3]:
@measure_execution_time
def MySQLQ1_1():
    # Define the query
    query = "SELECT * FROM shots WHERE Outcome = 'Goal' AND `Body Part` = 'Head';"
    
    # Execute the query
    cursor.execute(query)
    
    # Fetch all the results
    result = cursor.fetchall()
    
    # Print the first result
    if result:
        print(result[0])
    
    # Print the number of records
    print(f"Goals Scoared By Head : {len(result)}")

# Assuming you have the db_connection already defined
result, execution_time = MySQLQ1_1()

(1, '4a1a9578', 'Goal', 8.0, 'Head', 'de515487')
Goals Scoared By Head : 3963
Execution time: 0.12302422523498535 seconds


# 1.2 How many matches had attendees more than 12000?

In [4]:
@measure_execution_time
def MySQLQ1_2():
    # Define the query
    query = "SELECT * FROM matches WHERE Attendance > 12000;"

    # Execute the query
    cursor.execute(query)

    # Fetch all the results
    result = cursor.fetchall()

    # Print the first result (if any)
    if result:
        print(result[0])

    # Print the number of rows in the result
    print(f"Matches with attendees more than 12000 : {len(result)}")

# Call the function
result, execution_time = MySQLQ1_2()


(2, '2022-10-09', 11, 11, 'Sun', 24671, '04eea015', '922493f3', 2, 2, 59, 41, '/en/players/75f2c59f/Roberto-Pereyra', '/en/players/2a1beb34/Marten-de-Roon', 1.0, 2.1, 2.1, 1.0, '3-5-2', '3-4-1-2', '2022-10-09 15:00:00')
Matches with attendees more than 12000 : 9166
Execution time: 0.06501483917236328 seconds


#### 2.1 Total Goals Scoared and Inside VS Outside 16m box

In [5]:
@measure_execution_time
def MySQLQ2_1():
    # SQL query to filter only goals (Outcome = 'Goal') and calculate total goals per player
    query = '''  
    WITH unique_players AS (
        SELECT id, name
        FROM all_players
        GROUP BY id, name
    )
    SELECT 
        p.name AS Player_name,
        SUM(CASE WHEN s.distance > 16 THEN 1 ELSE 0 END) AS goals_gt_16, 
        SUM(CASE WHEN s.distance <= 16 THEN 1 ELSE 0 END) AS goals_lt_16,
        COUNT(*) AS total_goals 
    FROM shots s
    JOIN unique_players p ON s.Player = p.id
    WHERE s.Outcome = 'Goal'
    GROUP BY p.name
    ORDER BY total_goals DESC;
    '''
    cursor.execute(query)
    results_from_shots = cursor.fetchall()

    df = pd.DataFrame(results_from_shots, columns=['Player_name','goals_gt_16', 'goals_lt_16', 'total_goals'])

    print(df.head(10))

# Call the function and measure execution time
result, execution_time = MySQLQ2_1()


          Player_name goals_gt_16 goals_lt_16  total_goals
0       Kylian-Mbappe          27         120          147
1  Robert-Lewandowski          15         131          146
2          Harry-Kane          21          99          120
3        Lionel-Messi          37          69          106
4       Mohamed-Salah          14          90          104
5       Ciro-Immobile          10          94          104
6       Karim-Benzema          16          88          104
7      Erling-Haaland           7          95          102
8   Wissam-Ben-Yedder           3          89           92
9    Lautaro-Martinez          15          74           89
Execution time: 0.1790618896484375 seconds


#### 2.2 Rank teams based on average attendance at home games

In [6]:
@measure_execution_time
def MySQLQ2_2(cursor):
    # Query to get average attendance and count of matches by home_id
    query = """
    SELECT 
        m.home_id,
        AVG(m.Attendance) AS average_attendance,
        COUNT(*) AS matches_count,
        t.name AS team_name
    FROM matches m
    INNER JOIN teams t ON m.home_id = t.id
    GROUP BY m.home_id, t.name
    ORDER BY average_attendance DESC
    """
    
    cursor.execute(query)
    results_from_matches = cursor.fetchall()  # List of tuples
    
    # Convert the result to a list of dictionaries for easier handling in Python
    results_from_matches = [
        {
            'home_id': row[0],
            'average_attendance': row[1],
            'matches_count': row[2],
            'team_name': row[3]
        } 
        for row in results_from_matches
    ]
    
    # Convert to DataFrame for easy viewing and sorting
    df = pd.DataFrame(results_from_matches)
    df = df[['team_name', 'average_attendance', 'matches_count']]
    print(df.head(10))  # Display the top 10 rows

    # return results_from_matches

# Call the function and measure execution time
result, execution_time = MySQLQ2_2(cursor)


           team_name average_attendance  matches_count
0  Manchester United         56407.2273            176
1           Dortmund         56036.0216            139
2            Benfica         54471.1429             14
3       Hamburger SV         52331.0000              3
4          Barcelona         51691.4586            157
5      Bayern Munich         50702.6525            141
6        Real Madrid         47265.5404            161
7    West Ham United         46952.0690            145
8            Arsenal         46577.0500            160
9     Internazionale         46508.7736            159
Execution time: 0.032013654708862305 seconds


#### 3 CREATE TABLES statements

In [7]:
# for i in MySql_table_statements():
#     i=i.split(',')
#     for j in i:
#         print(j)