In [2]:
# https://rebeccabilbro.github.io/sparql-from-python/
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

question_df = pd.DataFrame()
sparql  = SPARQLWrapper("https://api.parliament.uk/sparql/")
sparql.setQuery("""
SELECT *
WHERE {
?question <https://id.parliament.uk/schema/writtenQuestionIndexingAndSearchUin> ?qnum .
?person <https://id.parliament.uk/schema/askingPersonHasQuestion> ?question .
?person <https://id.parliament.uk/schema/wikidataThingHasEquivalentWikidataResource> ?wikidataperson.
?question <https://id.parliament.uk/schema/questionText> ?text .
?question <https://id.parliament.uk/schema/questionAskedAt> ?date .
FILTER (?date >= "2023-01-01+00:00"^^xsd:dateTime && ?date < "2023-10-01+00:00"^^xsd:dateTime)
FILTER regex(?qnum, "^(?!HL)") 
}
""")

sparql.setReturnFormat(JSON)
result = sparql.query().convert()
# results -> bindings returns a list of dictionaries
result = result["results"]["bindings"]

# removed type from the results
for item in result:
    for key in item:
        item[key] = item[key]["value"]


In [136]:
question_df = pd.DataFrame(result)

question_df["text"] = question_df["text"].str.replace("<p>", "", )
question_df["text"] = question_df["text"].str.replace("</p>", "", )

question_df

KeyError: 'text'

In [301]:

district_df = pd.DataFrame()
sparql  = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?person ?district ?personLabel ?districtLabel
WHERE
{
  ?person p:P39 ?position.
  ?position ps:P39 wd:Q77685926.
  ?position pq:P768 ?district
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""")

sparql.setReturnFormat(JSON)
result = sparql.query().convert()
# results -> bindings returns a list of dictionaries
result = result["results"]["bindings"]

# removed type from the results
for item in result:
    for key in item:
        item[key] = item[key]["value"]
district_df = pd.DataFrame(result)
district_df

Unnamed: 0,district,person,personLabel,districtLabel
0,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,Vauxhall
1,http://www.wikidata.org/entity/Q3335605,http://www.wikidata.org/entity/Q28834855,Trudy Harrison,Copeland
2,http://www.wikidata.org/entity/Q1072710,http://www.wikidata.org/entity/Q30164739,Mike Hill,Hartlepool
3,http://www.wikidata.org/entity/Q3301747,http://www.wikidata.org/entity/Q30224361,David Linden,Glasgow East
4,http://www.wikidata.org/entity/Q1053429,http://www.wikidata.org/entity/Q30228118,Simon Clarke,Middlesbrough South and East Cleveland
...,...,...,...,...
697,http://www.wikidata.org/entity/Q1031884,http://www.wikidata.org/entity/Q122451364,Michael Shanks,Rutherglen and Hamilton West
698,http://www.wikidata.org/entity/Q3337694,http://www.wikidata.org/entity/Q19871819,Lisa Cameron,"East Kilbride, Strathaven and Lesmahagow"
699,http://www.wikidata.org/entity/Q988057,http://www.wikidata.org/entity/Q123113780,Sarah Edwards,Tamworth
700,http://www.wikidata.org/entity/Q1072632,http://www.wikidata.org/entity/Q122848847,Alistair Luke Strathern,Mid Bedfordshire


In [302]:
# 650 seats in the house of commons, yet 669 unique people returned
print(district_df["person"].duplicated().sum())
# return a df of people who arew in duplicate_people and not in duplicate_districts

district_df[district_df["person"].duplicated()][["person"]].head()

33


Unnamed: 0,person
45,http://www.wikidata.org/entity/Q291169
51,http://www.wikidata.org/entity/Q303299
62,http://www.wikidata.org/entity/Q575266
73,http://www.wikidata.org/entity/Q983174
77,http://www.wikidata.org/entity/Q5129278


In [303]:
# Of the people who appear more than once, do they have different districts in each occurence?
# This might cause problem if someone has multiple districts associated with them
district_df[district_df["person"].duplicated(keep=False)][~district_df["district"].duplicated(keep=False)]

  district_df[district_df["person"].duplicated(keep=False)][~district_df["district"].duplicated(keep=False)]


Unnamed: 0,district,person,personLabel,districtLabel


In [304]:
# all duplicated people have the same district in both. So we can remove duplicates of people
# duplicate districts are caused by multiple people being elected in the same district

district_df_cleaned = district_df.drop_duplicates()

# set the labels to object type
district_df_cleaned = district_df_cleaned.astype({"districtLabel": "object", "personLabel": "object"})

# there a some duplicate districts still because of members being suspended and then a by-election being called


# only keep the people who are in the question_df
# district_df_cleaned = district_df_cleaned[district_df_cleaned["person"].isin(question_df["wikidataperson"])]
district_df_cleaned

Unnamed: 0,district,person,personLabel,districtLabel
0,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,Vauxhall
1,http://www.wikidata.org/entity/Q3335605,http://www.wikidata.org/entity/Q28834855,Trudy Harrison,Copeland
2,http://www.wikidata.org/entity/Q1072710,http://www.wikidata.org/entity/Q30164739,Mike Hill,Hartlepool
3,http://www.wikidata.org/entity/Q3301747,http://www.wikidata.org/entity/Q30224361,David Linden,Glasgow East
4,http://www.wikidata.org/entity/Q1053429,http://www.wikidata.org/entity/Q30228118,Simon Clarke,Middlesbrough South and East Cleveland
...,...,...,...,...
695,http://www.wikidata.org/entity/Q751466,http://www.wikidata.org/entity/Q120780803,Steve Tuckwell,Uxbridge and South Ruislip
696,http://www.wikidata.org/entity/Q1031788,http://www.wikidata.org/entity/Q120780806,Keir Mather,Selby and Ainsty
697,http://www.wikidata.org/entity/Q1031884,http://www.wikidata.org/entity/Q122451364,Michael Shanks,Rutherglen and Hamilton West
699,http://www.wikidata.org/entity/Q988057,http://www.wikidata.org/entity/Q123113780,Sarah Edwards,Tamworth


In [8]:
# Check if there are any people who have asked questions but are not in the district_df
question_df[~question_df["wikidataperson"].isin(district_df_cleaned["person"])]

Unnamed: 0,question,qnum,person,wikidataperson,text,date


In [9]:
import random
import spacy  # version 3.0.6'

# initialize language model
nlp = spacy.load("en_core_web_md")

# add pipeline (declared through entry_points in setup.py)
nlp.add_pipe("entityLinker", last=True)

# pick random number
x = random.randint(0, len(question_df))

print(question_df["text"].iloc[x])

doc = nlp(question_df["text"].iloc[x])
doc._.linkedEntities.print_super_entities()
# for sent in doc.sents:
#     sent._.linkedEntities.pretty_print()

  from .autonotebook import tqdm as notebook_tqdm


To ask the Secretary of State for Health and Social Care, with reference to the Mental Health Bill, whether he plans to include a statutory duty to provide early intervention strategies to detect and address mental health issues for children and young people within primary and secondary schools; and if he will make a statement.
medical procedure (2) : home care,treatment
public office (1) : United States Secretary of State
secretary of state (1) : United States Secretary of State
foreign minister (1) : United States Secretary of State
software (1) : State
musical group (1) : Health
assist (1) : home care
type of medical procedure (1) : home care
hospitalization (1) : home care
relation (1) : reference


In [10]:
import requests
token = "5fcf52fb-79b2-4718-b7ee-62957df7d7e0-843339462"

def pick_random_question():
    x = random.randint(0, len(question_df))
    return question_df["text"].iloc[x]


def tag_me(text=None):
    if not text:
        text = pick_random_question()
        print(text)


    address = f"https://tagme.d4science.org/tagme/tag?lang=en&gcube-token={token}&include_categories=false&include_abstract=false&text={text}"
    response = requests.get(address)

    annotations = response.json()["annotations"]
    new_annotations = []

    for spot in annotations:
        # remove if rho is less than 0.1
        if spot["rho"] >= 0.1:
            new_annotations.append(spot)

    return new_annotations


def WAT(text=None):
    if not text:
        text = pick_random_question()
        #print(text)
       
    address = f"https://wat.d4science.org/wat/tag/tag?lang=en&gcube-token={token}&include_categories=true&text={text}"
    response = requests.get(address)
    annotations = response.json()["annotations"]
    new_annotations = []

    for spot in annotations:
        # remove if rho is less than 0.1
        if spot["rho"] >= 0.1:
            new_annotations.append(spot)
    return new_annotations


def REL(text=None):
    if not text:
        text = pick_random_question()
        print(text)
    API_URL = "https://rel.cs.ru.nl/api"

    # Example ED.
    ed_result = requests.post(API_URL, json={
        "text": text,
    })
    assert ed_result.status_code == 200
    
    return ed_result.json()

In [11]:
import time
#https://stackoverflow.com/questions/40098656/how-to-get-coordinates-from-a-wikipedia-page-through-api
def get_wikipedia_location(wikipedia_title):
    response = requests.get(f"https://en.wikipedia.org/w/api.php?action=query&prop=coordinates&format=json&titles={wikipedia_title}").json()
    try :
        # makes iterator over pages and takes first element
        page = next(iter(response['query']['pages'].values()))
        return page['coordinates'][0]["lat"], page['coordinates'][0]["lon"]
    except KeyError:
        return None

def convert_to_BNG(location):
    result = requests.get(f"http://webapps.bgs.ac.uk/data/webservices/CoordConvert_LL_BNG.cfc?method=LatLongToBNG&lat={location[0]}&lon={location[1]}").json()
    return result["EASTING"], result["NORTHING"]


# iterate through dataframe rows
# https://stackoverflow.com/questions/37024807/how-to-get-wikidata-id-for-an-wikipedia-article-by-api
def get_wikidata_from_wikipedia(title):
    response = requests.get(f"https://en.wikipedia.org/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&format=json&titles={title}").json()
    try :
        # makes iterator over pages and takes first element
        page = next(iter(response['query']['pages'].values()))
        return page['pageprops']['wikibase_item']
    except KeyError:
        assert False, "No wikidata found"

def get_wikidata_location(wikidata_id):
    sparql  = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(f"""
    SELECT ?position
    WHERE
    {{
    wd:{wikidata_id} wdt:P625 ?position
    }}
    """)

    sparql.setReturnFormat(JSON)
    result = sparql.query().convert()


    # results -> bindings returns a list of dictionaries
    result = result["results"]["bindings"]
    # if there is no location return None
    if result == []:
        return None
    result = result[0]["position"]["value"].replace("Point(", "").replace(")", "").split(" ")
    # return result as latitude, longitude
    return result[1], result[0]


In [100]:
#print(convert_to_BNG(get_wikidata_location("Q174193")))
print(get_wikipedia_location("United_Kingdom"))
len(question_df)
question_df[0:1]
len(question_df)
question_df[20000:26865]

(55, -3)


Unnamed: 0,question,qnum,person,wikidataperson,text,date
20000,https://id.parliament.uk/EjMYNuas,191051,https://id.parliament.uk/3H8gHMh9,http://www.wikidata.org/entity/Q304251,To ask the Secretary of State for the Home Dep...,2023-06-26+01:00
20001,https://id.parliament.uk/VXalFw1L,191006,https://id.parliament.uk/x591fWv4,http://www.wikidata.org/entity/Q480148,To ask the Secretary of State for Health and S...,2023-06-26+01:00
20002,https://id.parliament.uk/Ic18L2Ro,191041,https://id.parliament.uk/tBTPfLzI,http://www.wikidata.org/entity/Q689627,"To ask the Secretary of State for Foreign, Com...",2023-06-26+01:00
20003,https://id.parliament.uk/3e9rlV3l,191014,https://id.parliament.uk/9VjJ9Mfg,http://www.wikidata.org/entity/Q615077,"To ask the Secretary of State for Defence, wha...",2023-06-26+01:00
20004,https://id.parliament.uk/tdaKfqoz,191026,https://id.parliament.uk/nxNQZEuc,http://www.wikidata.org/entity/Q266793,To ask the Secretary of State for the Home Dep...,2023-06-26+01:00
...,...,...,...,...,...,...
26860,https://id.parliament.uk/pkosM2MK,906563,https://id.parliament.uk/wSRcmnqE,http://www.wikidata.org/entity/Q20127903,What steps his Department is taking to reduce ...,2023-09-19+01:00
26861,https://id.parliament.uk/JmAZSx2y,906567,https://id.parliament.uk/Vs3bGLNz,http://www.wikidata.org/entity/Q689287,If he will take steps to expedite the approval...,2023-09-19+01:00
26862,https://id.parliament.uk/dPiUF1wi,906604,https://id.parliament.uk/Fx1EcmX5,http://www.wikidata.org/entity/Q304027,What discussions he has had with the Welsh Gov...,2023-09-19+01:00
26863,https://id.parliament.uk/dhVF7qpN,906500,https://id.parliament.uk/iqJbTPtY,http://www.wikidata.org/entity/Q1681420,"To ask the Secretary of State for Environment,...",2023-09-19+01:00


In [101]:
# list of entites that are larger than a parliamentary constituency
start = 20_000
end = 26_865
banned_entities = [ "United_Kingdom", "England", "Wales", "Northern_Ireland", "Scotland"]
question_locations = []

for index, row in question_df[start:end].iterrows():
    for entity in WAT(row["text"]):
        while True:
            try:
                if entity["title"] in banned_entities:
                    break

                location = get_wikipedia_location(entity["title"])
                # sleep for 0.1 seconds to avoid rate limiting
                
                if location is not None:
                    location = convert_to_BNG(location)

                    # origin is in the isles of scilly, and spans from 0 to 700,000 easting and 0 to 1,300,000 northing
                    if location[0] >= 0 and location[0] <= 700_000 and location[1] >= 0 and location[1] <= 1_300_000:
                        print(entity["title"])
                        question_locations.append({"question": row["question"], "entity": entity["title"], "easting": location[0], "northing": location[1], "wikipedia": f"https://en.wikipedia.org/wiki/{entity['title']}"})
                break
            except KeyError:
                print("time out")
                time.sleep(3)

question_locations_df = pd.DataFrame(question_locations)
question_locations_df.to_csv(f"question_entities/question_locations_{end}.csv")
question_locations

Cambridgeshire
Cabinet_Office
11_Downing_Street
Cabinet_Office
Hove
Cabinet_Office
National_Audit_Office_(United_Kingdom)
Glasgow
Northwood_Headquarters
Department_for_Transport
Gloucestershire
British_Transport_Police
Department_for_Transport
British_Transport_Police
British_Transport_Police
HM_Treasury
Cabinet_Office
Cabinet_Office
North_East_England
Department_for_Transport
Department_for_Transport
Department_for_Transport
Basildon
Billericay
Department_for_Transport
British_Transport_Police
Department_for_Transport
Department_for_Transport
Cabinet_Office
Department_for_Transport
British_Transport_Police
Department_for_Transport
Cabinet_Office
Department_for_Transport
Falmouth_Docks
HM_Treasury
Cabinet_Office
Department_for_Transport
Department_for_Transport
Cabinet_Office
North_East_England
Department_for_Transport
Cabinet_Office
Cabinet_Office
Office_for_National_Statistics
North_East_England
Cabinet_Office
St_Helens,_Merseyside
Falmouth_Docks
Cabinet_Office
Cabinet_Office
Kingsto

[{'question': 'https://id.parliament.uk/32D4KCWL',
  'entity': 'Cambridgeshire',
  'easting': 536372.951668183,
  'northing': 272465.472673855,
  'wikipedia': 'https://en.wikipedia.org/wiki/Cambridgeshire'},
 {'question': 'https://id.parliament.uk/rR6fI8rb',
  'entity': 'Cabinet_Office',
  'easting': 530118.174894204,
  'northing': 179960.770401966,
  'wikipedia': 'https://en.wikipedia.org/wiki/Cabinet_Office'},
 {'question': 'https://id.parliament.uk/rR6fI8rb',
  'entity': '11_Downing_Street',
  'easting': 530051.23676842,
  'northing': 179935.120979539,
  'wikipedia': 'https://en.wikipedia.org/wiki/11_Downing_Street'},
 {'question': 'https://id.parliament.uk/fEYgRIu8',
  'entity': 'Cabinet_Office',
  'easting': 530118.174894204,
  'northing': 179960.770401966,
  'wikipedia': 'https://en.wikipedia.org/wiki/Cabinet_Office'},
 {'question': 'https://id.parliament.uk/wkMoaqbq',
  'entity': 'Hove',
  'easting': 528552.041993163,
  'northing': 105548.472750481,
  'wikipedia': 'https://en.wi

In [115]:
# iterate through the csv files in question_entities and combine them into one dataframe, question_locations_df
import os
question_locations_df = pd.DataFrame()
for file in os.listdir("question_entities"):
    if file.endswith(".csv"):
        print(file)
        new_questions = pd.read_csv(f"question_entities/{file}")
        # reset the index

        new_questions = new_questions.drop(columns=["Unnamed: 0"])
        question_locations_df = pd.concat([question_locations_df, new_questions], ignore_index=True)
question_locations_df

question_locations_1000.csv
question_locations_2000.csv
question_locations_20000.csv
question_locations_26865.csv


Unnamed: 0,question,entity,easting,northing,wikipedia
0,https://id.parliament.uk/dmnAotxP,Office_for_National_Statistics,328820.179860,185811.595715,https://en.wikipedia.org/wiki/Office_for_Natio...
1,https://id.parliament.uk/wne3Q3kQ,West_Midlands_(region),382689.369764,286801.459728,https://en.wikipedia.org/wiki/West_Midlands_(r...
2,https://id.parliament.uk/Q0uNVDhz,Slough,498082.779241,179773.798368,https://en.wikipedia.org/wiki/Slough
3,https://id.parliament.uk/5utw5sA0,Cabinet_Office,530118.174894,179960.770402,https://en.wikipedia.org/wiki/Cabinet_Office
4,https://id.parliament.uk/oHeIeP9G,River_Tees,455114.463816,528466.390692,https://en.wikipedia.org/wiki/River_Tees
...,...,...,...,...,...
4423,https://id.parliament.uk/51DqbupU,HM_Prison_Wandsworth,526728.635699,173925.158213,https://en.wikipedia.org/wiki/HM_Prison_Wandsw...
4424,https://id.parliament.uk/c7mhUeV3,Rathlin_Island,133639.823902,608129.542743,https://en.wikipedia.org/wiki/Rathlin_Island
4425,https://id.parliament.uk/Py4BvdeG,Cabinet_Office,530118.174894,179960.770402,https://en.wikipedia.org/wiki/Cabinet_Office
4426,https://id.parliament.uk/Py4BvdeG,Downing_Street_mortar_attack,530021.784566,179958.304242,https://en.wikipedia.org/wiki/Downing_Street_m...


In [163]:
import geopandas as gpd

# Read the shapefile into a GeoPandas dataframe
enw = gpd.read_file("shape/ew_wpc_2022.shp")

# https://www.bcomm-scotland.independent.gov.uk/?q=reviews/2023-review-uk-parliament-constituencies
sct = gpd.read_file("shape/UK_Parliament_4th_constituencies.shp")
# convert "NAME" column name to "name"
sct = sct.rename(columns={"Name": "name"})


ni = gpd.read_file("shape/nireland_aa_2008.shp")



In [316]:
en_wl_sct = gpd.read_file("shape/westminster_const_region.shp")
ni = gpd.read_file("shape/nireland_aa_2008.shp")
en_wl_sct = en_wl_sct.rename(columns={"NAME": "name"})
uk_geometry = pd.concat([en_wl_sct[["name", "geometry"]], ni[["name", "geometry"]]])

In [332]:
replace_dict = {
    ", ": " ",
    "Co Const": "",
    "Boro Const": "",
    "Burgh Const": "",
    ".": "",
    "ô": "o"
}

uk_geometry["name"] = uk_geometry["name"].replace(replace_dict, regex=True)
uk_geometry["name"] = uk_geometry["name"].str.rstrip()

uk_geometry[[uk_geometry["name"] == "Weston-Super-Mare"]] = "Weston-super-Mare"
uk_geometry = uk_geometry[["name", "geometry"]]




district_df_cleaned["districtLabel"] = district_df_cleaned["districtLabel"].replace(replace_dict, regex=True)


In [338]:
# remove duplicates of the same district
MP_districts = pd.merge(district_df_cleaned, uk_geometry, left_on="districtLabel", right_on="name")
MP_districts

Unnamed: 0,district,person,personLabel,districtLabel,name,geometry
0,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((277338.188 183355.648, 277331.315 18..."
1,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((287117.800 344607.497, 287115.797 34..."
2,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((384027.404 806411.397, 384415.400 80..."
3,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((281907.697 673240.396, 281960.201 67..."
4,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((483364.601 160961.804, 483370.702 16..."
...,...,...,...,...,...,...
645,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((133935.764 554139.594, 133948.527 55..."
646,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((159098.489 507672.926, 159101.141 50..."
647,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((166192.764 532012.369, 166196.879 53..."
648,http://www.wikidata.org/entity/Q873637,http://www.wikidata.org/entity/Q24061230,Florence Eshalomi,,,"POLYGON ((123712.628 523719.851, 123731.107 52..."


In [220]:
#MP_districts = pd.merge(district_df_cleaned, uk_geometry, left_on="districtLabel", right_on="name", how="left")
#MP_districts = MP_districts.dropna(subset=["geometry"])
# MP_districts["districtLabel"].duplicated().sum()
# MP_districts.columns

# MP_districts = MP_districts[["personLabel", "person", "districtLabel", "district", "geometry"]]
# len(MP_districts)

# show values containing NA

#len(MP_districts)
#MP_districts


# district_df_cleaned[district_df_cleaned["personLabel"] == "Steven Bonnar"]
#uk_geometry[uk_geometry["name"] == "Coatbridge Chryston and Bellshil"]
#MP_districts
#district_df_cleaned[district_df_cleaned["districtLabel"] == "Aberconwy"]
MP_districts[MP_districts.isna().any(axis=1)]
    # glasgow_districts = uk_geometry[uk_geometry["name"].str.contains("glasgow", case=False)]
    # glasgow_districts


Unnamed: 0,district,person,personLabel,districtLabel,name,geometry
0,http://www.wikidata.org/entity/Q1032152,http://www.wikidata.org/entity/Q259707,Stuart Andrew,Pudsey,,
1,http://www.wikidata.org/entity/Q3138501,http://www.wikidata.org/entity/Q261773,Rosie Winterton,Doncaster Central,,
2,http://www.wikidata.org/entity/Q613294,http://www.wikidata.org/entity/Q263076,Robin Walker,Worcester,,
3,http://www.wikidata.org/entity/Q1031940,http://www.wikidata.org/entity/Q263350,Jake Berry,Rossendale and Darwen,,
4,http://www.wikidata.org/entity/Q1031751,http://www.wikidata.org/entity/Q263508,Clive Betts,Sheffield South East,,
...,...,...,...,...,...,...
664,http://www.wikidata.org/entity/Q751466,http://www.wikidata.org/entity/Q120780803,Steve Tuckwell,Uxbridge and South Ruislip,,
665,http://www.wikidata.org/entity/Q1031788,http://www.wikidata.org/entity/Q120780806,Keir Mather,Selby and Ainsty,,
666,http://www.wikidata.org/entity/Q1031884,http://www.wikidata.org/entity/Q122451364,Michael Shanks,Rutherglen and Hamilton West,,
667,http://www.wikidata.org/entity/Q988057,http://www.wikidata.org/entity/Q123113780,Sarah Edwards,Tamworth,,


In [315]:
for name in uk_geometry["name"].unique():
    print(name)

Aberavon
Aberconwy
Aberdeen North
Airdrie and Shotts
Aldershot
Aldridge-Brownhills
Altrincham and Sale West
Alyn and Deeside
Amber Valley
Angus
Arfon
Arundel and South Downs
Ashfield
Ashford
Ashton-under-Lyne
Aylesbury
Banbury
Barking
Barnsley Central
Barnsley East
Barrow and Furness
Basildon and Billericay
Basingstoke
Bassetlaw
Bath
Batley and Spen
Battersea
Beaconsfield
Beckenham
Bedford
Bermondsey and Old Southwark
Berwickshire Roxburgh and Selkirk
Bethnal Green and Bow
Beverley and Holderness
Bexhill and Battle
Bexleyheath and Crayford
Birkenhead
Birmingham Edgbaston
Birmingham Erdington
Birmingham Hall Green
Birmingham Hodge Hill
Birmingham Ladywood
Birmingham Northfield
Birmingham Perry Barr
Birmingham Selly Oak
Birmingham Yardley
Bishop Auckland
Blackburn
Blackley and Broughton
Blackpool North and Cleveleys
Blackpool South
Blaenau Gwent
Blaydon
Blyth Valley
Bognor Regis and Littlehampton
Bolsover
Bolton North East
Bolton South East
Bolton West
Bootle
Boston and Skegness
Bosworth

In [291]:
for name in district_df_cleaned["districtLabel"].unique():
    print(name)

Pudsey
Doncaster Central
Worcester
Rossendale and Darwen
Sheffield South East
Reigate
Staffordshire Moorlands
Exeter
Maldon
Cardiff West
North West Leicestershire
West Ham
Christchurch
Tunbridge Wells
Eddisbury
West Lancashire
Leyton and Wanstead
Swansea West
Gosport
Wallasey
Garston and Halewood
Sunderland Central
Ribble Valley
Chesham and Amersham
Surrey Heath
North Tyneside
Elmet and Rothwell
Brigg and Goole
Rugby
Hexham
Halesowen and Rowley Regis
Easington
Newport East
Amber Valley
Lincoln
Belfast West
East Worthing and Shoreham
Bromsgrove
Harwich and North Essex
South Down
Foyle
St Helens North
Brent Central
Orpington
Belfast East
Manchester Gorton
Newry and Armagh
South Antrim
Ealing Central and Acton
Croydon North
Ochil and South Perthshire
Bristol South
Aberdeen North
Dundee West
Edinburgh East
Midlothian
Bradford South
St Austell and Newquay
Fareham
Lewisham Deptford
Wealden
Wirral West
Sheffield Heeley
Swansea East
York Central
Plymouth Moor View
Belfast South
Erewash
Mid Dor

In [472]:
import shapely

# add new column to question_locations_df
question_locations_df["district"] = None

# https://stackoverflow.com/questions/7861196/check-if-a-geopoint-with-latitude-and-longitude-is-within-a-shapefile
for index, row in question_locations_df.iterrows():

    point = shapely.geometry.Point(row["easting"], row["northing"])
    for index2, row2 in MP_districts.iterrows():
        if row2["geometry"].contains(point):
            question_locations_df.at[index, "district"] = row2["districtLabel"]
            print(row["entity"])
            print(row["question"])
            print(row2["districtLabel"])
            print()
            break

Office_for_National_Statistics
https://id.parliament.uk/dmnAotxP
Newport West

West_Midlands_(region)
https://id.parliament.uk/wne3Q3kQ
South Staffordshire

Slough
https://id.parliament.uk/Q0uNVDhz
Slough

Cabinet_Office
https://id.parliament.uk/5utw5sA0
Cities of London and Westminster

Hounslow
https://id.parliament.uk/W7K6780e
Feltham and Heston

Buckingham_(UK_Parliament_constituency)
https://id.parliament.uk/e9R3Yc8s
Buckingham

Ofcom
https://id.parliament.uk/e9R3Yc8s
Bermondsey and Old Southwark



In [52]:
question_locations_df

Unnamed: 0,question,entity,easting,northing,wikipedia
0,https://id.parliament.uk/I8GPhExr,London,530050.067317,180360.858134,https://en.wikipedia.org/wiki/London
1,https://id.parliament.uk/O6dOod4v,Kirklees,413269.122922,410732.050719,https://en.wikipedia.org/wiki/Kirklees
2,https://id.parliament.uk/O6dOod4v,West_Yorkshire,422078.707593,428232.624284,https://en.wikipedia.org/wiki/West_Yorkshire
3,https://id.parliament.uk/P62u1XIZ,Parliament_of_the_United_Kingdom,530264.992787,179500.878758,https://en.wikipedia.org/wiki/Parliament_of_th...
4,https://id.parliament.uk/P62u1XIZ,Parliament_of_the_United_Kingdom,530264.992787,179500.878758,https://en.wikipedia.org/wiki/Parliament_of_th...
...,...,...,...,...,...
137,https://id.parliament.uk/c8kWFzzU,Office_for_National_Statistics,328820.179860,185811.595715,https://en.wikipedia.org/wiki/Office_for_Natio...
138,https://id.parliament.uk/yGBvvUNz,Sefton_Central_(UK_Parliament_constituency),334794.297724,403377.423780,https://en.wikipedia.org/wiki/Sefton_Central_(...
139,https://id.parliament.uk/mzMTPwdx,Office_for_National_Statistics,328820.179860,185811.595715,https://en.wikipedia.org/wiki/Office_for_Natio...
140,https://id.parliament.uk/4IV962Qo,Department_for_Transport,529961.029006,178954.030967,https://en.wikipedia.org/wiki/Department_for_T...
