In [2]:
from fractions import Fraction

In [3]:
import country_converter
import geopandas as gpd
import pandas as pd

## Open Geojson

In [4]:
european_countries_gdf = gpd.read_file("data/europe.geojson")

## Get Socialist data

In [5]:
res = pd.read_html(
    "https://en.wikipedia.org/wiki/Party_of_European_Socialists#Full_members"
)

In [6]:
original_socialist_df = res[1][["State", "MEPs[a]"]]

In [7]:
original_socialist_df = original_socialist_df[
    original_socialist_df["MEPs[a]"] != "Not in EU"
].reset_index()

#### Manually Fix Duplicates

In [8]:
duplicate_df = original_socialist_df[original_socialist_df["State"].duplicated()]
duplicate_df

Unnamed: 0,index,State,MEPs[a]
2,2,Belgium,1 / 13[. 2]
14,14,Hungary,0 / 21
17,17,Italy,0 / 76


In [11]:
original_socialist_df[original_socialist_df["State"] == "Belgium"]

Unnamed: 0,index,State,MEPs[a]
1,1,Belgium,2 / 8[. 1]
2,2,Belgium,1 / 13[. 2]


In [12]:
original_socialist_df[original_socialist_df["State"] == "Hungary"]

Unnamed: 0,index,State,MEPs[a]
13,13,Hungary,2 / 21
14,14,Hungary,0 / 21


In [13]:
original_socialist_df[original_socialist_df["State"] == "Italy"]

Unnamed: 0,index,State,MEPs[a]
16,16,Italy,21 / 76
17,17,Italy,0 / 76


In [14]:
duplicates = [
    {"State": "Belgium", "MEPs[a]": "3/21"},
    {"State": "Hungary", "MEPs[a]": "2/21"},
    {"State": "Italy", "MEPs[a]": "21/76"},
]
fixed_duplicates_df = pd.DataFrame(duplicates)

#### Back to Socialist DF

In [15]:
socialist_df = original_socialist_df[
    ~original_socialist_df["State"].isin(duplicate_df["State"])
]

In [16]:
socialist_df = pd.concat([socialist_df, fixed_duplicates_df])[
    ["State", "MEPs[a]"]
].reset_index()

In [18]:
percent_of_socialists = []
number_of_socialists = []
for fraction in list(socialist_df["MEPs[a]"]):
    str_fraction = fraction.split("[")[0].replace(" ", "")
    number_of_socialists.append(int(str_fraction.split("/")[0]))

    percent_of_socialist = float(Fraction(str_fraction))
    percent_of_socialists.append(percent_of_socialist)

In [19]:
fractions = [
    round(float(Fraction(fraction.split("[")[0].replace(" ", ""))), 3)
    for fraction in list(socialist_df["MEPs[a]"])
]
socialist_df["percent"] = fractions
socialist_df["number_of_socialists"] = number_of_socialists

In [20]:
socialist_df["ISO2"] = socialist_df.apply(
    lambda row: country_converter.convert(names=row["State"], to="ISO2"), axis=1
)

## Merge data and export

In [21]:
socialist_gdf = european_countries_gdf.merge(socialist_df, on="ISO2")[
    ["ISO2", "geometry", "percent", "number_of_socialists"]
]

In [22]:
socialist_gdf = socialist_gdf.to_crs(3035)

In [24]:
socialist_gdf.to_file("data/socialist_percent.gpkg")

In [25]:
socialist_gdf["number_of_socialists"].sum()

np.int64(135)