In [5]:
import requests
import pandas as pd
from io import StringIO

url = "https://digitallibrary.un.org/record/4060887/files/2025_7_23_ga_voting.csv?ln=en"

response = requests.get(url)


In [8]:
import os
import pandas as pd

# TODO: add vetoes, vetoes are not included atm
def fetch_UN_data(dir_path=None):
    """
    Fetches and processes United Nations General Assembly and Security Council voting data.
    
    This function retrieves voting data from either local files or the UN Digital Library,
    and transforms the data into two formats: original and pivoted (transformed).
    
    Parameters:
    -----------
    dir_path : str, optional
        Path to directory where data should be read from or saved to.
        If None, data will be fetched from the UN Digital Library and not saved locally.
    
    Returns:
    --------
    tuple
        A tuple containing four DataFrames:
        - df_ga: Original GA voting data
        - df_ga_transformed: Pivoted GA voting data with countries as columns
        - df_sc: Original SC voting data
        - df_sc_transformed: Pivoted SC voting data with countries as columns
    
    Notes:
    ------
    - Currently, the Security Council data does not include veto information explicitly.
    """

    df_ga = None
    df_sc = None

    if dir_path:
        try:
            df_ga = pd.read_csv(f"{dir_path}/2025_7_23_ga_voting.csv")
            df_sc = pd.read_csv(f"{dir_path}/2025_7_21_sc_voting.csv")
        except FileNotFoundError:
            print("Not all data found locally. Fetching from UN Digital Library...")
    if df_ga is None or df_sc is None:
        ga_url = "https://digitallibrary.un.org/record/4060887/files/2025_7_23_ga_voting.csv?ln=en"
        sc_url = "https://digitallibrary.un.org/record/4055387/files/2025_7_21_sc_voting.csv?ln=en"

        try:
            df_ga = pd.read_csv(ga_url)
            df_sc = pd.read_csv(sc_url)

            # Save data locally if dir_path is provided
            if dir_path:
                # Check if directory exists, create it if it doesn't
                if not os.path.exists(dir_path):
                    os.makedirs(dir_path)
                    print(f"Created directory: {dir_path}")
                
                df_ga.to_csv(f"{dir_path}/2025_7_23_ga_voting.csv", index=False)
                df_sc.to_csv(f"{dir_path}/2025_7_21_sc_voting.csv", index=False)
        except Exception as e:
            print("Error fetching data from UN Digital Library. The dataset might has been updated. Check the date in the URL.")
            print(f"Error: {e}")
            return None, None, None, None
    
    # Transform ga data
    ga_index_columns = ["undl_id", "date", "session", "resolution", "draft", "committee_report", "meeting", "title", "agenda_title", "subjects", "total_yes", "total_no", "total_abstentions", "total_non_voting", "total_ms", "undl_link"]
    df_ga_transformed = df_ga.pivot(index=ga_index_columns, columns='ms_code', values='ms_vote').reset_index()
    df_ga_transformed.columns.name = None

    # Transform sc data
    sc_index_columns = ["undl_id", "date", "resolution", "draft", "meeting", "description", "agenda", "subjects", "modality", "total_yes", "total_no", "total_abstentions", "total_non_voting", "total_ms", "undl_link"]
    df_sc_transformed = df_sc.pivot(index=sc_index_columns, columns='ms_code', values='ms_vote').reset_index()
    df_sc_transformed.columns.name = None

    return df_ga, df_ga_transformed, df_sc, df_sc_transformed


In [9]:
df_ga, df_ga_transformed, df_sc, df_sc_transformed = fetch_UN_data(dir_path="C:\\Users\\janic\\OneDrive\\Desktop\\ETH\\UN Projekt\\data")

  df_ga = pd.read_csv(f"{dir_path}/2025_7_23_ga_voting.csv")


In [10]:
df_ga.columns.tolist()

['undl_id',
 'ms_code',
 'ms_name',
 'ms_vote',
 'date',
 'session',
 'resolution',
 'draft',
 'committee_report',
 'meeting',
 'title',
 'agenda_title',
 'subjects',
 'vote_note',
 'total_yes',
 'total_no',
 'total_abstentions',
 'total_non_voting',
 'total_ms',
 'undl_link']

# Display unique country codes and names

We want to check if there are country codes that map to multiple country names

In [11]:
len(df_ga['ms_code'].unique())

202

In [12]:
len(df_ga['ms_name'].unique())

243

In [13]:
# Group by ms_code and collect all unique names associated with each code
ms_code_to_names = df_ga.groupby('ms_code')['ms_name'].unique().reset_index()

# Display number of codes with multiple names
multiple_names_mask = ms_code_to_names['ms_name'].apply(len) > 1
multiple_names_count = multiple_names_mask.sum()

print(f"Total unique country codes: {len(ms_code_to_names)}")
print(f"Country codes with multiple names: {multiple_names_count}")

# Display codes with multiple names
if multiple_names_count > 0:
    print("\nCountry codes with multiple associated names:")
    for _, row in ms_code_to_names[multiple_names_mask].iterrows():
        print(f"{row['ms_code']}: {', '.join(row['ms_name'])}")

Total unique country codes: 202
Country codes with multiple names: 33

Country codes with multiple associated names:
BEN: BENIN, DAHOMEY
BFA: BURKINA FASO, UPPER VOLTA
BLR: BELARUS, BYELORUSSIAN SSR
BOL: BOLIVIA, BOLIVIA (PLURINATIONAL STATE OF)
CAF: CENTRAL AFRICAN REPUBLIC, CENTRAL AFRICAN EMPIRE
CIV: IVORY COAST, CÔTE D'IVOIRE, COTE D'IVOIRE
CMR: CAMEROON, UNITED REPUBLIC OF CAMEROON
COD: ZAIRE, DEMOCRATIC REPUBLIC OF THE CONGO, CONGO (LEOPOLDVILLE), CONGO (DEMOCRATIC REPUBLIC OF)
COG: CONGO, CONGO (BRAZZAVILLE)
CPV: CAPE VERDE, CABO VERDE
CZE: CZECH REPUBLIC, CZECHIA
EGY: EGYPT, UNITED ARAB REPUBLIC
IRN: IRAN (ISLAMIC REPUBLIC OF), IRAN
KHM: CAMBODIA, DEMOCRATIC KAMPUCHEA, KHMER REPUBLIC
KNA: SAINT KITTS AND NEVIS, SAINT CHRISTOPHER AND NEVIS
LAO: LAO PEOPLE'S DEMOCRATIC REPUBLIC, LAO PEOPLE's DEMOCRATIC REPUBLIC, LAOS
LBY: LIBYAN ARAB JAMAHIRIYA, LIBYA, LIBYAN ARAB REPUBLIC
LKA: SRI LANKA, CEYLON
MDV: MALDIVES, MALDIVE ISLANDS
MKD: THE FORMER YUGOSLAV REPUBLIC OF MACEDONIA, NORTH 

In [14]:
# Create a DataFrame with country codes and names
country_mapping = []
for _, row in ms_code_to_names.iterrows():
    code = row['ms_code']
    names = row['ms_name']
    # If it's a list with one element, just take that element
    if len(names) == 1:
        country_mapping.append({'Code': code, 'Name': names[0]})
    else:
        # For multiple names, join them with ' / '
        country_mapping.append({'Code': code, 'Name': ' / '.join(names)})

# Convert to DataFrame and display
country_df = pd.DataFrame(country_mapping)
country_df = country_df.sort_values('Code')

# Display with nice formatting
print(f"Total countries: {len(country_df)}\n")
for i, row in country_df.iterrows():
    print(f"{row['Code']}: {row['Name']}")

Total countries: 202

AFG: AFGHANISTAN
AGO: ANGOLA
ALB: ALBANIA
AND: ANDORRA
ARE: UNITED ARAB EMIRATES
ARG: ARGENTINA
ARM: ARMENIA
ATG: ANTIGUA AND BARBUDA
AUS: AUSTRALIA
AUT: AUSTRIA
AZE: AZERBAIJAN
BDI: BURUNDI
BEL: BELGIUM
BEN: BENIN / DAHOMEY
BFA: BURKINA FASO / UPPER VOLTA
BGD: BANGLADESH
BGR: BULGARIA
BHR: BAHRAIN
BHS: BAHAMAS
BIH: BOSNIA AND HERZEGOVINA
BLR: BELARUS / BYELORUSSIAN SSR
BLZ: BELIZE
BOL: BOLIVIA / BOLIVIA (PLURINATIONAL STATE OF)
BRA: BRAZIL
BRB: BARBADOS
BRN: BRUNEI DARUSSALAM
BTN: BHUTAN
BWA: BOTSWANA
CAF: CENTRAL AFRICAN REPUBLIC / CENTRAL AFRICAN EMPIRE
CAN: CANADA
CHE: SWITZERLAND
CHL: CHILE
CHN: CHINA
CIV: IVORY COAST / CÔTE D'IVOIRE / COTE D'IVOIRE
CMR: CAMEROON / UNITED REPUBLIC OF CAMEROON
COD: ZAIRE / DEMOCRATIC REPUBLIC OF THE CONGO / CONGO (LEOPOLDVILLE) / CONGO (DEMOCRATIC REPUBLIC OF)
COG: CONGO / CONGO (BRAZZAVILLE)
COL: COLOMBIA
COM: COMOROS
CPV: CAPE VERDE / CABO VERDE
CRI: COSTA RICA
CSK: CZECHOSLOVAKIA
CUB: CUBA
CYP: CYPRUS
CZE: CZECH REPUBLIC / 

33 Countries have multiple names for the same country code. 

However one example that I noticed is that Yugoslavia (YUG) and Russian Federation (RUS) have separate country codes!