In [None]:
import os
from urllib.request import urlopen
import ssl, certifi
import pandas as pd

os.makedirs("../data", exist_ok=True)

ctx = ssl.create_default_context(cafile=certifi.where())

urls = {
    "unvotes":    "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-23/unvotes.csv",
    "roll_calls": "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-23/roll_calls.csv",
    "issues":     "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-23/issues.csv",
}

for name, url in urls.items():
    with urlopen(url, context=ctx) as f:
        df = pd.read_csv(f)
    out_path = f"../data/{name}.csv"
    df.to_csv(out_path, index=False)
    print(f"✅ Saved to {out_path}")

✅ Saved to ../data/unvotes.csv
✅ Saved to ../data/roll_calls.csv
✅ Saved to ../data/issues.csv


In [10]:
unvotes    = pd.read_csv('../data/unvotes.csv')
roll_calls = pd.read_csv('../data/roll_calls.csv')
issues = pd.read_csv('../data/issues.csv')

print(unvotes.head())
print(roll_calls.head())
print(issues.head())

   rcid             country country_code vote
0     3       United States           US  yes
1     3              Canada           CA   no
2     3                Cuba           CU  yes
3     3               Haiti           HT  yes
4     3  Dominican Republic           DO  yes
   rcid  session  importantvote        date    unres  amend  para  \
0     3        1            0.0  1946-01-01   R/1/66    1.0   0.0   
1     4        1            0.0  1946-01-02   R/1/79    0.0   0.0   
2     5        1            0.0  1946-01-04   R/1/98    0.0   0.0   
3     6        1            0.0  1946-01-04  R/1/107    0.0   0.0   
4     7        1            0.0  1946-01-02  R/1/295    1.0   0.0   

                            short  \
0  AMENDMENTS, RULES OF PROCEDURE   
1      SECURITY COUNCIL ELECTIONS   
2                VOTING PROCEDURE   
3     DECLARATION OF HUMAN RIGHTS   
4      GENERAL ASSEMBLY ELECTIONS   

                                               descr  
0  TO ADOPT A CUBAN AMENDMENT T

## Importing libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

unvotes    = pd.read_csv('../data/unvotes.csv')
roll_calls = pd.read_csv('../data/roll_calls.csv')
issues = pd.read_csv('../data/issues.csv')

merged = unvotes.merge(roll_calls, on='rcid', how='left').merge(issues, on='rcid', how='left')

#group based on region 
region_map = {
    'United States': 'Americas', 'France': 'Europe', 'China': 'Asia',
    'Nigeria': 'Africa', 'Australia': 'Oceania'
}
merged['region'] = merged['country'].map(region_map)

X = merged[['issue', 'session', 'region']].fillna('Unknown')
y = merged['vote']

preprocessor = ColumnTransformer([('cat', OneHotEncoder(drop='first'), ['issue', 'session', 'region'])])
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=500, multi_class='multinomial'))
])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))




              precision    recall  f1-score   support

     abstain       0.42      0.00      0.01     28618
          no       0.47      0.07      0.13     16175
         yes       0.81      1.00      0.89    179874

    accuracy                           0.80    224667
   macro avg       0.56      0.36      0.34    224667
weighted avg       0.73      0.80      0.72    224667

