In [1]:
import pandas as pd

df = pd.read_csv("../Data/athlete_events.csv")

sports_both_season = df.groupby("Sport")["Season"].nunique().reset_index().query("Season == 2")["Sport"].tolist()

print(f"Sports in both seasons {sports_both_season}")


filtered_df = df[df["Sport"].isin(sports_both_season)]

years_in_both = filtered_df.groupby("Sport")["Year"].unique().reset_index()
print(years_in_both)


Sports in both seasons ['Alpinism', 'Figure Skating', 'Ice Hockey']
            Sport                                               Year
0        Alpinism                                 [1924, 1936, 1932]
1  Figure Skating  [1964, 1968, 2010, 2014, 1998, 1994, 2002, 194...
2      Ice Hockey  [2002, 2014, 1924, 1952, 1980, 1984, 1972, 192...


In [2]:
medals_won_hungary = df[
    (df["Sport"].isin(sports_both_season)) & (df["NOC"] == "HUN")
]

total_medals = medals_won_hungary.groupby("Year")[["Sport","Medal","Sex","Season"]].value_counts()

print("Total medals Hungary won in sports featured in both Olympics:", total_medals)

Total medals Hungary won in sports featured in both Olympics: Year  Sport           Medal   Sex  Season
1932  Figure Skating  Bronze  F    Winter    1
                              M    Winter    1
1936  Figure Skating  Bronze  F    Winter    1
                              M    Winter    1
1948  Figure Skating  Silver  F    Winter    1
                              M    Winter    1
1952  Figure Skating  Bronze  F    Winter    1
                              M    Winter    1
1956  Figure Skating  Bronze  F    Winter    1
                              M    Winter    1
1980  Figure Skating  Silver  F    Winter    1
                              M    Winter    1
Name: count, dtype: int64


In [3]:
# Hungarys best wintersport

hungary = df[(df["NOC"] == "HUN") & (df["Season"] == "Winter")]

sport_medal_counts = (hungary.groupby("Sport")["Medal"].count().sort_values(ascending=False))
number_of_medals = sport_medal_counts.sum()

best_sport = sport_medal_counts.idxmax()
print(f"Hungary's best winter sport is {best_sport} with {number_of_medals} medals.")

Hungary's best winter sport is Figure Skating with 12 medals.


In [4]:
# Hungarys best summersport

hungary = df[(df["NOC"] == "HUN") & (df["Season"] == "Summer")]
sport_medal_counts = (hungary.groupby("Sport")["Medal"].count().sort_values(ascending=False))
number_of_medals = sport_medal_counts.sum()

best_sport = sport_medal_counts.idxmax()
print(f"Hungary's best summer sport is {best_sport} with {number_of_medals} medals.")

Hungary's best summer sport is Fencing with 1123 medals.


In [26]:
# Oldest and youngest medal winner, any medal
oldest_winner = hungary[hungary["Medal"].notna()].sort_values("Age", ascending=False).iloc[0]
youngest_winner = hungary[hungary["Medal"].notna()].sort_values("Age").iloc[0]

print(f"Oldest winner for Hungary: {oldest_winner}")
print(f"Youngest winner for Hungary: {youngest_winner}")


Oldest winner for Hungary: ID                                              74532
Name                                  Miltiades Manno
Sex                                                 M
Age                                              53.0
Height                                            NaN
Weight                                           76.0
Team                                          Hungary
NOC                                               HUN
Games                                     1932 Summer
Year                                             1932
Season                                         Summer
City                                      Los Angeles
Sport                                Art Competitions
Event     Art Competitions Mixed Sculpturing, Statues
Medal                                          Silver
Name: 148575, dtype: object
Youngest winner for Hungary: ID                                     78100
Name                     Krisztina Medveczky
Sex             

In [None]:
# Best and worst year
medals_per_year = hungary.dropna(subset=['Medal']).groupby('Year')['Medal'].count()
best_year = medals_per_year.idxmax()
worst_year = medals_per_year.idxmin()

print(f"Best Year: {best_year} with {medals_per_year.max()} medals")
print(f"Worst Year: {worst_year} with {medals_per_year.min()} medals")

Best Year: 1952 with 102 medals
Worst Year: 1904 with 4 medals


In [None]:
# Atheletes competing in multiple sports
multi_athletes = hungary.groupby("Name")["Sport"].nunique()

print(multi_athletes[multi_athletes > 1])

Name
Alfrd (Arnold-) Hajs (Guttmann-)    2
Andrs Baronyi                       2
Andrs Bodnr                         2
Andrs Han                           2
Attila Horvth                       2
                                   ..
Zoltn Horvth                        2
Zoltn Kovcs                         2
Zoltn Molnr                         2
Zsfia Kovcs                         2
Zsolt Nmeth                         3
Name: Sport, Length: 84, dtype: int64
