In [1]:
"""
2. Link MP's to constituencies and their geometry
"""

"\n2. Link MP's to constituencies and their geometry\n"

In [2]:
"""
Produces dataframe of geometry of all westminister constituencies
"""
import pandas as pd
import geopandas as gpd
en_wl_sct = gpd.read_file("shape/westminster_const_region.shp")
ni = gpd.read_file("shape/nireland_aa_2008.shp")

en_wl_sct = en_wl_sct.rename(columns={"NAME": "name"})
uk_geometry = pd.concat([en_wl_sct[["name", "geometry"]], ni[["name", "geometry"]]])

In [3]:
"""
SPARQL query to wikidata endpoint to get all MPs and their constituencies

"""
from SPARQLWrapper import SPARQLWrapper, JSON

const_df = pd.DataFrame()
sparql  = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?person ?const ?personLabel ?constLabel
WHERE
{
  ?person p:P39 ?position.
  ?position ps:P39 wd:Q77685926.
  ?position pq:P768 ?const.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""")

sparql.setReturnFormat(JSON)
result = sparql.query().convert()
# results -> bindings returns a list of dictionaries
result = result["results"]["bindings"]

# removed type from the results
for item in result:
    for key in item:
        item[key] = item[key]["value"]
const_df = pd.DataFrame(result)
const_df.head(3)

Unnamed: 0,const,person,personLabel,constLabel
0,http://www.wikidata.org/entity/Q1032152,http://www.wikidata.org/entity/Q259707,Stuart Andrew,Pudsey
1,http://www.wikidata.org/entity/Q3138501,http://www.wikidata.org/entity/Q261773,Rosie Winterton,Doncaster Central
2,http://www.wikidata.org/entity/Q613294,http://www.wikidata.org/entity/Q263076,Robin Walker,Worcester


In [5]:
# remove duplicate records (which can occur when a person changes parties i.e Julian Lewis)
const_df = const_df.drop_duplicates()
const_df = const_df.astype({"constLabel": "object", "personLabel": "object"})

# I have decided not remove duplicate people for the same constituency, which can happen if a bi-election occurs.
# This is because either of them could have asked questions.

In [6]:
"""
Makes the names of the constituencies consistent between uk_geometry and district_df_cleaned
"""
replace_dict = {
    "," : "",
    "Co Const": "",
    "Cor Const" : "",
    "Boro Const" : "",
    "Burgh Const" : "",
    "." : "",
    "Weston-Super-Mare" : "Weston-super-Mare",
    "ô" : "o"
}

for key in replace_dict:
    uk_geometry["name"] = uk_geometry["name"].str.replace(key, replace_dict[key])
    const_df["constLabel"] = const_df["constLabel"].str.replace(key, replace_dict[key])

uk_geometry["name"] = uk_geometry["name"].str.rstrip()

uk_geometry = uk_geometry[["name", "geometry"]]

In [7]:

MP_geometry = pd.merge(const_df, uk_geometry, left_on="constLabel", right_on="name", how="inner")
print(len(MP_geometry["constLabel"].unique()))

650


In [8]:
"""
Check for any constituencies present in one but not the other
"""
for name in uk_geometry["name"].unique():
    if name not in MP_geometry["constLabel"].unique():
        print(name)

for district in const_df["constLabel"].unique():
    if name not in MP_geometry["name"].unique():
        print(name)

In [9]:
MP_geometry.drop(columns=["name"], inplace=True)
MP_geometry

Unnamed: 0,const,person,personLabel,constLabel,geometry
0,http://www.wikidata.org/entity/Q1032152,http://www.wikidata.org/entity/Q259707,Stuart Andrew,Pudsey,"POLYGON ((419147.497 440845.799, 419130.100 44..."
1,http://www.wikidata.org/entity/Q3138501,http://www.wikidata.org/entity/Q261773,Rosie Winterton,Doncaster Central,"POLYGON ((455018.897 401469.600, 455071.303 40..."
2,http://www.wikidata.org/entity/Q613294,http://www.wikidata.org/entity/Q263076,Robin Walker,Worcester,"POLYGON ((385822.601 258541.703, 385809.499 25..."
3,http://www.wikidata.org/entity/Q1031940,http://www.wikidata.org/entity/Q263350,Jake Berry,Rossendale and Darwen,"POLYGON ((388662.753 425190.573, 388691.404 42..."
4,http://www.wikidata.org/entity/Q1031751,http://www.wikidata.org/entity/Q263508,Clive Betts,Sheffield South East,"POLYGON ((439407.302 391666.296, 439465.100 39..."
...,...,...,...,...,...
664,http://www.wikidata.org/entity/Q1080518,http://www.wikidata.org/entity/Q76449017,Simon Jupp,East Devon,"POLYGON ((294508.731 96046.540, 294508.510 960..."
665,http://www.wikidata.org/entity/Q1077598,http://www.wikidata.org/entity/Q76450642,Jane Hunt,Loughborough,"POLYGON ((450929.602 315678.597, 450928.201 31..."
666,http://www.wikidata.org/entity/Q1072775,http://www.wikidata.org/entity/Q76450728,Kate Osborne,Jarrow,"POLYGON ((435134.003 566077.102, 435320.399 56..."
667,http://www.wikidata.org/entity/Q1070140,http://www.wikidata.org/entity/Q76736577,Greg Smith,Buckingham,"POLYGON ((460619.899 235588.003, 460625.870 23..."


In [10]:
# save to csv
MP_geometry.to_csv("MP_geometry.csv", index=False)

In [11]:
MP_geometry.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 669 entries, 0 to 668
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   const        669 non-null    object  
 1   person       669 non-null    object  
 2   personLabel  669 non-null    object  
 3   constLabel   669 non-null    object  
 4   geometry     669 non-null    geometry
dtypes: geometry(1), object(4)
memory usage: 26.3+ KB
