# Import Required Libraries
Import the necessary libraries, including geopandas, matplotlib, and pandas.

In [None]:
# Import Required Libraries
import geopandas as gpd  # type: ignore
import matplotlib.pyplot as plt  # type: ignore
import pandas as pd  # type: ignore
import config
import os
import tempfile
import zipfile

# Load the active members data

In [None]:
active_members_extra_gender_df = pd.read_csv(config.ACTIVE_MEMBERS_EXTRA_INTERIM)
# active_members_extra_gender_df.head()

# Group by Canadian Province, City and Count Members
- Group the active members by province and count members
- Group the active members by province/city and count members

In [None]:
# Group by Country and Count Members
# Count by province
province_distribution = (
    active_members_extra_gender_df[active_members_extra_gender_df["COUNTRY"] == "Canada"][
        ["COUNTRY", "STATE"]
    ]
    .value_counts()
    .reset_index(name="COUNT")
)
print(province_distribution.head())

# Count by city
canadian_city_distribution = (
    active_members_extra_gender_df[active_members_extra_gender_df["COUNTRY"] == "Canada"][
        ["COUNTRY", "CITY"]
    ]
    .value_counts()
    .reset_index(name="COUNT")
)
print(canadian_city_distribution.head())

# Read Shapefile from each Zip Files
Read the shapefile from each zip file and load it into a GeoDataFrame.

In [None]:
# Read the shapefile from the zip file and load it into a GeoDataFrame
with zipfile.ZipFile(config.NE_50M_ADMIN_1_STATES_PROVINCES, "r") as zip_ref:
    with tempfile.TemporaryDirectory() as tmpdirname:
        zip_ref.extractall(tmpdirname)
        shapefile_path = os.path.join(tmpdirname, "ne_50m_admin_1_states_provinces.shp")
        states_provinces_df = gpd.read_file(shapefile_path)

with zipfile.ZipFile(config.NE_50M_POPULATED_PLACES, "r") as zip_ref:
    with tempfile.TemporaryDirectory() as tmpdirname:
        zip_ref.extractall(tmpdirname)
        shapefile_path = os.path.join(tmpdirname, "ne_50m_populated_places.shp")
        populated_df = gpd.read_file(shapefile_path)

# states_provinces_df.head()
populated_df.head()

# Test Plot of the maps

In [None]:
# Filter the provinces to only include those in Canada
canadian_provinces_df = states_provinces_df[states_provinces_df["iso_a2"] == "CA"]

# Filter the cities to only include those in Canada
canadian_cities_df = populated_df[populated_df["ADM0_A3"] == "CAN"]

# Plot the Canadian provinces in Canadian RED
ax = canadian_provinces_df.plot(figsize=(10, 10), color=config.CANADA_RED, edgecolor="black")

# Plot the Canadian cities on top of the provinces in BMW blue with larger circles
canadian_cities_df.plot(ax=ax, color=config.BMW_BLUE, markersize=100, edgecolor="black")

# Annotate the city names
for idx, row in canadian_cities_df.iterrows():
    plt.text(row.geometry.x, row.geometry.y, row["NAME"], fontsize=8, ha="right", color="black")

plt.show()

# Merge Country Distribution with World Map
Merge the country distribution DataFrame with the world map GeoDataFrame.

In [None]:
# Merge the filtered country distribution with the world map
# merged_world_df = world_df.merge(canadian_city_distribution, how='left', left_on=['NAME', 'city'], right_on=['country', 'city'])
# merged_world_df.head()

# Plot the World Map
Plot the world map, showing the countries with counted members, and save the plot.

In [None]:
# # Plot the Filtered Map

# # Render Canada in red, the USA in blue, and other countries in grey
# colors = {'Canada': config.CANADA_RED, 'United States of America': config.USA_BLUE}
# world_df['color'] = world_df['NAME'].apply(lambda x: colors.get(x, 'grey'))

# # Plot the map with the specified colors
# world_df[world_df['CONTINENT'] == 'North America'].plot(color=world_df[world_df['CONTINENT'] == 'North America']['color'])

# # Center the continent in the window
# plt.xlim(-170, -30)
# plt.ylim(0, 80)

# # Plot circles over each country where count is not missing with a number in the middle of the circle
# for idx, row in merged_world_df.dropna(subset=['count']).iterrows():
#     if row['country'] == 'Canada':
#         x_offset, y_offset = -10, -4
#     elif row['country'] == 'United States of America':
#         x_offset, y_offset = 12, -6
#     else:
#         x_offset, y_offset = 0, 0

#     if row['country'] == 'Canada':
#         size = row['count'] * 3  # Adjust the multiplier for Canada
#     else:
#         size = row['count'] * 10  # Adjust the multiplier for other countries
#     plt.scatter(row['geometry'].centroid.x + x_offset, row['geometry'].centroid.y + y_offset, s=size, color='white', edgecolor='none', alpha=1.0)
#     plt.text(row['geometry'].centroid.x + x_offset, row['geometry'].centroid.y + y_offset, str(int(row['count'])),
#              ha='center', va='center', fontsize=8, color='black')

# # Add a title and show the plot
# plt.title('Active Club Members by Country', pad=10)

# # Remove the axes
# plt.axis('off')

# # Save and show the plot
# plt.savefig(config.CLUB_MEMBER_CANADIAN_CITY_DISTRIBUTION)
# plt.show()