In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import requests

<div style="background-color: #fdfd96; padding: 20px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);">
    
## Which UNESCO World Heritage Sites are located within 50 km of major cities with populations over 2.5 million, and what are the primary languages spoken in these cities?

# 1. UNESCO Dataset creation

In [2]:
endpoint_url = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper(endpoint_url)
query = """
SELECT ?site ?siteLabel ?location WHERE {
  ?site wdt:P31 wd:Q9259;   # Heritage site instance
        wdt:P625 ?location.  # Geographical coordinates
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
data = []
for result in results["results"]["bindings"]:
    site = result["site"]["value"]
    site_label = result["siteLabel"]["value"]
    location = result["location"]["value"]
    data.append({"Site": site, "SiteLabel": site_label, "Location": location})

heritage_df = pd.DataFrame(data)

# 2. Cities with over 1000 citizens Dataset importation

In [3]:
url = "https://raw.githubusercontent.com/nabilashraf/cities1000/master/cities1000.txt"
response = requests.get(url)
with open("cities1000.txt", "wb") as file:
    file.write(response.content)
cities_df = pd.read_csv("cities1000.txt", sep="\t", header=None, encoding="utf-8")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


# 3. Cleaning

In [10]:
heritage_df_cleaned = heritage_df[['SiteLabel', 'Location']] 
cities_df_cleaned = cities_df[[1, 2, 3, 4, 5, 14]]  # Colonne 1=ID, 2=Nom de la ville, 4=Longitude, 5=Latitude, 14=Population
cities_df_cleaned.columns = ['CityID', 'CityName', 'OtherInfo', 'Longitude', 'Latitude', 'Population']
cities_df_cleaned['Location'] = 'POINT(' + cities_df_cleaned['Longitude'].astype(str) + ' ' + cities_df_cleaned['Latitude'].astype(str) + ')'
cities_df_final = cities_df_cleaned[['CityName', 'Population', 'Location', 'Latitude', 'Longitude']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cities_df_cleaned['Location'] = 'POINT(' + cities_df_cleaned['Longitude'].astype(str) + ' ' + cities_df_cleaned['Latitude'].astype(str) + ')'


# 4. Map Visualization

In [4]:
result_df = pd.read_csv("SPARQL_GraphDB_RESULTS.csv")

In [13]:
import folium
heritage_df_cleaned[['Longitude', 'Latitude']] = heritage_df_cleaned['Location'].str.extract(r'Point\((-?\d+\.\d+) (-?\d+\.\d+)\)').astype(float)

mymap = folium.Map(location=[20, 0], zoom_start=2)

for _, row in result_df.iterrows():
    city_data = cities_df_final[cities_df_final['CityName'] == row['CityName']].iloc[0]
    city_latitude = city_data['Longitude']
    city_longitude = city_data['Latitude']
    site_data = heritage_df_cleaned[heritage_df_cleaned['SiteLabel'] == row['UNESCO Site']].iloc[0]
    site_latitude = site_data['Latitude']
    site_longitude = site_data['Longitude']
    folium.Marker(
        location=[city_latitude, city_longitude],
        popup=f"<b>City:</b> {row['CityName']}<br><b>Population:</b> {row['Population']}<br><b>Language:</b> {row['Language']}",
        icon=folium.Icon(color="blue", icon="info-sign"),
    ).add_to(mymap)
    folium.Marker(
        location=[site_latitude, site_longitude],
        popup=f"<b>UNESCO Site:</b> {row['UNESCO Site']}<br><b>Distance (km):</b> {row['Distance (km)']:.2f}",
        icon=folium.Icon(color="green", icon="globe"),
    ).add_to(mymap)
    folium.PolyLine(
        locations=[
            [city_latitude, city_longitude],
            [site_latitude, site_longitude],
        ],
        color="red",
        weight=2,
    ).add_to(mymap)

mymap.save("map_results.html")
mymap