# Project aim

**Fresquito** is a simple, web-based project which shows an interactive map with the coldest and hottest town in Spain, in real time. It accomplishes this by using web scraping and running on a local server (*Raspberry Pi 4B*) through **Flask**.

In [1]:
#Importing libraries.

import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from tqdm import tqdm
from selenium.webdriver.chrome.options import Options
import shutup
shutup.please()
import lxml.html
import re
import threading
import datetime
import geopy
from geopy.geocoders import Nominatim
import folium

## Locating every town link

Before we can start actually scraping any data we must locate every town link. It won't be easy, but it must be done.

In [3]:
# Initiating our chromedriver

link = 'https://www.eltiempo.es/en-provincia-almeria/abejuela.html'
driver = webdriver.Chrome()
driver.get(link)

In [4]:
# Since most province link (which holds town links)

links_auto = []

links = driver.find_elements(By.XPATH,  "//a[contains(@href, 'provincia')]") # Grabbing every link containing 'provincia'

for i in links:
    links_auto.append(i.get_attribute('href')) # Appending the link to our list

In [58]:
# Some provinces don't have 'provincia' in the link, so we'll have to add them manually. Luckily they aren't many

links_manual = ['https://www.eltiempo.es/navarra',
                'https://www.eltiempo.es/melilla',
                'https://www.eltiempo.es/ceuta',
                'https://www.eltiempo.es/alava',
                'https://www.eltiempo.es/guipuzcoa',
                'https://www.eltiempo.es/vizcaya',
                'https://www.eltiempo.es/a-coruna',
                'https://www.eltiempo.es/ourense',
                'https://www.eltiempo.es/baleares',
                'https://www.eltiempo.es/la-rioja',
                'https://www.eltiempo.es/asturias',
                'https://www.eltiempo.es/murcia']

In [59]:
# Let's join all the links

provinces = links_auto + links_manual

In [60]:
# 52 provinces, as expected

len(provinces)

52

Now that we have all the province links it's time to access them and grab every town link.

In [162]:
links = []

for i in tqdm(provinces):
    driver.get(i)
    time.sleep(5)
    
    xpath_a = '//*[@id="main"]/div[4]/div/section[6]/article/div/div['
    xpath_b = ']/ul/li['
    xpath_c = ']/a' 
    
    for n in range(25):
        xpath_base = xpath_a + str(n) + xpath_b
        for p in range(1000):
            try:
                xpath_full = xpath_base + str(p) + xpath_c
                town_link = driver.find_element(By.XPATH, xpath_full).get_attribute('href')
                links.append(town_link)
            except:
                pass

100%|███████████████████████████████████████████████████████████████████████████████| 52/52 [4:45:32<00:00, 329.46s/it]


In [243]:
# Dropping duplicates.

links = list(set(links))

##  Creating dictionaries with town data

Using one of the site's network calls we can obtain a dictionary with all meteo data in real time, but first we'll have to find the **pelmorex_id** of every town so that we can perform the call. Let's use regex to create a dictionary containing **town url**, **name** and **pelmorex_id**.

In [305]:
# Grabbing the javascript text containing the town name and pelmorex_id.

script = soup.find_all('script', {'type': 'text/javascript'})[2].text
script

'\n        var current_location_info = {};\n                                            // prepare all the data for the PSS alert system\n        current_location_info[\'foreca_id\'] = "102522495";\n        current_location_info[\'urlized\'] = "en-provincia-almeria/abejuela";\n        current_location_info[\'pelmorex_id\'] = "ESXX16293";\n        current_location_info[\'name\'] = "Abejuela";\n        current_location_info[\'region_name\'] = "Almería";\n        current_location_info[\'country_name\'] = "";\n        current_location_info[\'pss\'] = "";\n                    current_location_info[\'url\'] = "/en-provincia-almeria/abejuela.html";\n                    current_location_info[\'weather\'] = "";\n        current_location_info[\'timestamp\'] = "";\n        current_location_info[\'city_page_pelmorex_id\'] = "ESXX16293";\n        current_location_info[\'is_gps\'] = false;\n                    '

In [312]:
# Isolating the palmorex_id.

result = re.search("""pelmorex_id\'] = "(.*)";\n        current_location""", script)
pelmorex_id = result.group(1)
pelmorex_id

'ESXX16293'

In [318]:
# Performing the same operation to extract the town name.

result = re.search("""name\'] = "(.*)";\n        current_location""", script)
town_name = result.group(1)
town_name

'Abejuela'

In [321]:
# And finally, province.

result = re.search("""region_name\'] = "(.*)";\n        current_location""", script)
province = result.group(1)
province

'Almería'

It seems like this approach *might* be viable. 

Now we need to visit every link from our list and, if it's a town, perform the following operations:

    - Parse page, grab town name
    - Grab town pelmorex_id
    - Grab town province
    - Create a dictionary with url + previous data
    - Store dictionary in a list

In [331]:
# Creating our loop.

town_dicts = [] # Holding the dictionaries containing all scraped data
fail = [] # Storing failed links
id_counter = [] # Storing IDs as to eliminate duplicate values.

for i in tqdm(links):
    page = requests.get(i) # Accessing the link.
    soup = BeautifulSoup(page.content, 'lxml') # Parsing it as usual
    # If the link points to a town we'll create and store the dictionary as follows
    try:
        if 'El tiempo en' in soup.find_all('h1')[0].get_text(): # Using the previous condition to filter the links
            try:
                script = soup.find_all('script', {'type': 'text/javascript'})[2].text # Locating the javascript text
                result = re.search("""pelmorex_id\'] = "(.*)";\n        current_location""", script)
                pelmorex_id = result.group(1) # Saving pelmorex_id
                result = re.search("""name\'] = "(.*)";\n        current_location""", script)
                town_name = result.group(1) # Town name
                result = re.search("""region_name\'] = "(.*)";\n        current_location""", script)
                province = result.group(1) # Province
                
                if pelmorex_id not in id_counter: # Checking if it's a duplicate.
                    # Now we can create and save the dictionary
                    town = {'url': i, 'pelmorex_id': pelmorex_id, 'name': town_name, 'province': province}
                    town_dicts.append(town) # Appending it to the list
                    id_counter.append(pelmorex_id)
                else:
                    pass
            except:
                fail.append(i) # If the loop fails we'll see at which url it crashed 

        else:
            pass
    except:
        print(i)
        pass

 97%|███████████████████████████████████████████████████████████████████████▍  | 23996/24859 [4:12:30<07:46,  1.85it/s]

https://www.eltiempo.es/martes.html


100%|██████████████████████████████████████████████████████████████████████████| 24859/24859 [4:21:33<00:00,  1.58it/s]


In [332]:
# Checking how many entries we have.

len(town_dicts)

24858

In [333]:
# Luckily it managed to scrape all links.

fail

[]

In [652]:
# An example of a dictionary.

town_dicts[0]

{'url': 'https://www.eltiempo.es/mallecina.html',
 'pelmorex_id': 'ESO1194',
 'name': 'Mallecina',
 'province': 'Asturias'}

In [338]:
# Let's save this list of dictionaries as a Pandas dataframe since we'll need it later.

town_index = pd.DataFrame(town_dicts)
town_index.to_csv('town_index.csv', index=False)

## Scraping all links using BS4

Now that we have all (24K+) individual links we can get to the best part: scraping them.

My first approach was to use a simple loop, but it was very inefficient and time-consuming.

In [430]:
# Please notice how the API url uses the pelmorex_id that we extracted earlier, instead of town name.

page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/ESO1194')
soup = BeautifulSoup(page.content, 'lxml') # Parsing the content.
soup.text

'{"type":"observatory","timestamp":{"local":"2022-06-26 10:25","local_ts":1656239100,"local_date":"2022-06-26","local_year":"2022","local_month":"06","local_month_name":"june","local_day":"26","local_time":"10:25","local_weekday":"0","local_weekday_name":"sunday","utc":"2022-06-26 08:25","utc_ts":1656231900},"icon":{"standard":"d200","extended":"d200"},"temperature":{"c":11,"f":52},"feels_like":{"c":11,"f":52},"wind":{"direction":"SW","speed":{"kmh":4,"mph":2,"ms":1,"kt":2,"bft":1},"gust":{"kmh":6,"mph":4,"ms":2,"kt":3,"bft":1}},"humidity":{"percent":80},"pressure":{"hPa":1016,"trend":2},"dew_point":{"c":8,"f":46},"visibility":{"km":23},"ceiling":{"feet":5451,"meters":1662},"cloudiness":null,"pop":{"percent":30},"pot":{"percent":0},"rain":{"mm":0},"snow":{"cm":0},"temp_reliability":null,"wind_reliability":null,"sun":null,"location":{"province_name":"Asturias","province_urlized":"asturias","site":"Eltiempo.es","locale":"es"}}'

As we can see, the output contains all the relevant weather data that we could need. Let's test a loop.

In [653]:
# This loop scrapes every link and stores the needed data as a dictionary. Let's benchmark it with 1000 links.

start = time.time() # Starting our timer

meteo = []

for i in tqdm(town_dicts[:1000]): # Using tqdm to create a progress bar
    id = i['pelmorex_id']
    page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/' + str(id))
    soup = eval(BeautifulSoup(page.content, 'lxml').text.replace('null', """'null'""")) # Replacing null
    temp_dict = {'pelmorex_id': id, 'temp': soup['temperature']['c'], 'timestamp': soup['timestamp']['local']} # Creating dict
    meteo.append(temp_dict) # Appending the dictionary
    
end = time.time()
print ("Time elapsed:", end - start) # Printing the elapsed time

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [06:10<00:00,  2.70it/s]

Time elapsed: 370.86693263053894





In [654]:
meteo[:10]

[{'pelmorex_id': 'ESO1194', 'temp': 15, 'timestamp': '2022-06-26 18:55'},
 {'pelmorex_id': 'ESXX9534', 'temp': 16, 'timestamp': '2022-06-26 19:05'},
 {'pelmorex_id': 'ESXX16949', 'temp': 30, 'timestamp': '2022-06-26 19:05'},
 {'pelmorex_id': 'ESXX11553', 'temp': 14, 'timestamp': '2022-06-26 19:05'},
 {'pelmorex_id': 'ESXX12715', 'temp': 19, 'timestamp': '2022-06-26 18:05'},
 {'pelmorex_id': 'ESXX10487', 'temp': 14, 'timestamp': '2022-06-26 19:05'},
 {'pelmorex_id': 'ESXX1741', 'temp': 21, 'timestamp': '2022-06-26 19:05'},
 {'pelmorex_id': 'ESXX0302', 'temp': 26, 'timestamp': '2022-06-26 18:55'},
 {'pelmorex_id': 'ESXX4751', 'temp': 29, 'timestamp': '2022-06-26 18:55'},
 {'pelmorex_id': 'ESXX11540', 'temp': 17, 'timestamp': '2022-06-26 19:05'}]

As we can see, it would take more than two hours to scrape every link. Absolutely unacceptable.

### Using multithreading to speed up the scraping

Since web scraping isn't very CPU intensive but requires some waiting until the page throws a response, multithreading holds great promise.

In this section we'll use the previously developed procedures and incorporate multithreading into the process.

In [822]:
start = time.time()

meteo = []    

def fetch_links(i):
    id = i['pelmorex_id']
    page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/' + str(id))
    soup = eval(BeautifulSoup(page.content, 'lxml').text.replace('null', """'null'"""))
    temp_dict = {'pelmorex_id': id, 'temp': soup['temperature']['c'], 'timestamp': soup['timestamp']['local']}
    meteo.append(temp_dict)

threads = [threading.Thread(target=fetch_links, args=(i,))
           for i in town_dicts[:1000]] # Using multithreading to access all links

for t in threads:
    t.start() # Starting the action

time.sleep(2)

end = time.time()
print ("Time elapsed:", end - start)

Time elapsed: 5.214828968048096


As we can see, we got an enormous improvement by using multithreading. Let's now test it with the whole list of links.

In [656]:
# Since we can't access all links at once since we did before or we'll run into timeout issues, we'll have to 
# splice the work into 1000 link segments. We can accomplish this using two simple counters that are updated at every run
# and stop once all links have been accessed.

start = time.time()

meteo = []  
a = 0 # First counter
b = 1000 # Second counter

while b < 27000: # Once b passes this threshold, the sraping will stop
    def fetch_links(i):
        id = i['pelmorex_id']
        page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/' + str(id))
        soup = eval(BeautifulSoup(page.content, 'lxml').text.replace('null', """'null'"""))
        temp_dict = {'pelmorex_id': id, 'temp': soup['temperature']['c'], 'timestamp': soup['timestamp']['local']}
        meteo.append(temp_dict)

    threads = [threading.Thread(target=fetch_links, args=(i,))
               for i in town_dicts[a:b]] # Accessing links indexed from a to b (0 to 1000)

    for t in threads:
        t.start()
    a += 1000 # Next run we'll scrape links from 1000 to 2000
    b += 1000
    time.sleep(2) # Sleeping for 2s so that all requests can be fulfilled on time
    
time.sleep(2)

end = time.time()
print ("Time elapsed:", end - start)

Time elapsed: 137.5862033367157


In [None]:
# Finally, saving the result as a dataframe

meteo_df = pd.DataFrame(meteo)

Now we can scrape every link in a mere *2 minutes*. **Success!!**

### Locating the coldest (and hottest) towns

Now that we have a dataframe containing town data (name, url, province, pelmorex_id) and another one that contains the latest temperature it's simply a matter of joining them together using *merge*.

In [658]:
town_index.head(1)

Unnamed: 0,url,pelmorex_id,name,province
0,https://www.eltiempo.es/mallecina.html,ESO1194,Mallecina,Asturias


In [659]:
meteo_df.head(1)

Unnamed: 0,pelmorex_id,temp,timestamp
0,ESXX12097,12,2022-06-26 18:35


As we can see, both dataframes share the **pelmorex_id** column. Let's join them.

In [660]:
output = pd.merge(town_index, meteo_df, left_on='pelmorex_id', right_on='pelmorex_id', how='left')
output.head()

Unnamed: 0,url,pelmorex_id,name,province,temp,timestamp
0,https://www.eltiempo.es/mallecina.html,ESO1194,Mallecina,Asturias,15,2022-06-26 18:35
1,https://www.eltiempo.es/arcocha.html,ESXX9534,Arcocha,Vizcaya,15,2022-06-26 18:35
2,https://www.eltiempo.es/torrejon.html,ESXX16949,Torrejón,Teruel,30,2022-06-26 18:35
3,https://www.eltiempo.es/pousacarro.html,ESXX11553,Pousacarro,A Coruña,14,2022-06-26 18:35
4,https://www.eltiempo.es/lomo-pelado.html,ESXX12715,Lomo Pelado,Tenerife,18,2022-06-26 17:35


In [None]:
# Saving the dataframe

output.to_csv('output.csv', index=False)

In [665]:
# Finally, let's find the coldest temperature in the country

output['temp'].min() # Obtaining coldest temperature

5

In [666]:
# Now let's see which towns share this temperature

output[output['temp'] == 5]

Unnamed: 0,url,pelmorex_id,name,province,temp,timestamp
573,https://www.eltiempo.es/oblanca.html,ESXX18135,Oblanca,León,5,2022-06-26 18:35
951,https://www.eltiempo.es/saguera-de-luna.html,ESXX17503,Sagüera de Luna,León,5,2022-06-26 18:35
1909,https://www.eltiempo.es/folledo.html,ESXX19146,Folledo,León,5,2022-06-26 18:35
3721,https://www.eltiempo.es/san-martin-de-la-terci...,ESXX17380,San Martín de la Tercia,León,5,2022-06-26 18:35
3839,https://www.eltiempo.es/pontedo.html,ESXX17855,Pontedo,León,5,2022-06-26 18:35
3870,https://www.eltiempo.es/cabornera.html,ESXX19743,Cabornera,León,5,2022-06-26 18:35
4380,https://www.eltiempo.es/casares-de-arbas.html,ESXX19607,Casares de Arbas,León,5,2022-06-26 18:25
8198,https://www.eltiempo.es/la-vega-de-robledo.html,ESXX18578,La Vega de Robledo,León,5,2022-06-26 18:35
8235,https://www.eltiempo.es/camplongo.html,ESXX19692,Camplongo,León,5,2022-06-26 18:35
8886,https://www.eltiempo.es/piorneda.html,ESXX17881,Piorneda,León,5,2022-06-26 18:35


In [667]:
# Now let's find the hottest towns

output[output['temp'] == output['temp'].max()] # Combining the previous 2 steps into a single dataframe query

Unnamed: 0,url,pelmorex_id,name,province,temp,timestamp
1052,https://www.eltiempo.es/en-provincia-almeria/s...,ESXX8598,Santa Cruz,Almería,35,2022-06-26 18:35
1927,https://www.eltiempo.es/santa-cruz-de-marchena...,ESXX7533,Santa Cruz de Marchena,Almería,35,2022-06-26 18:35
5882,https://www.eltiempo.es/alsodux.html,ESXX5111,Alsodux,Almería,35,2022-06-26 18:35
6578,https://www.eltiempo.es/bentarique.html,ESXX7532,Bentarique,Almería,35,2022-06-26 18:35
7355,https://www.eltiempo.es/alhabia.html,ESXX21068,Alhabia,Almería,35,2022-06-26 18:35
11257,https://www.eltiempo.es/huecija.html,ESXX2208,Huécija,Almería,35,2022-06-26 18:35
14831,https://www.eltiempo.es/alicun.html,ESXX21067,Alicún,Almería,35,2022-06-26 18:35
15335,https://www.eltiempo.es/alboloduy.html,ESXX0993,Alboloduy,Almería,35,2022-06-26 18:35
16052,https://www.eltiempo.es/terque.html,ESXX7502,Terque,Almería,35,2022-06-26 18:35


## Turning the process into a loop 

Since my intention is to host this loop in a **Raspberry Pi 4B**, we'll have to pack it up nicely beforehand. Let's combine all previous processes into a single loop that also prints out every time it runs successfully.

This loop will need to perform the following functions:

    - Load the town_index csv.
    - Scrape all town links.
    - Generate a new meteo dataframe.
    - Join it with the previously loaded DF.
    - Save the resulting dataframe.
    - Announce at which time the loop ran.
    

In [27]:
def dataframe_generator():
    try:
        town_index = pd.read_csv('town_index.csv') #Loading the town_index csv

        # Storing all pelmorex_id as a list
        pelmorex_list = town_index['pelmorex_id'].tolist()

        # Scraping all links and holding the data as a list of dictionaries.
        meteo = []  
        a = 0 
        b = 1000 

        while b < 27000: 
            def fetch_links(n):
                id = n
                page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/' + str(id))
                soup = eval(BeautifulSoup(page.content, 'lxml').text.replace('null', """'null'"""))
                temp_dict = {'pelmorex_id': id, 'temp': soup['temperature']['c'], 'timestamp': soup['timestamp']['local']}
                meteo.append(temp_dict)

            threads = [threading.Thread(target=fetch_links, args=(i,))
                       for i in pelmorex_list[a:b]] 

            for t in threads:
                t.start()
            a += 1000
            b += 1000
            time.sleep(2)

        time.sleep(2)

        # Saving the output as a dataframe
        meteo_df = pd.DataFrame(meteo)

        # Merging the dataframes and saving the output
        output = pd.merge(town_index, meteo_df, left_on='pelmorex_id', right_on='pelmorex_id', how='left')
        output.to_csv('output.csv', index=False)

        now = datetime.datetime.now()
        print('New DF generated successfully on', str(now.day)+'-'+str(now.month)+'-'+str(now.year), 'at', str(now.hour)+':'+str(now.minute))
        return output
    except:
        now = datetime.datetime.now()
        print('Process FAILED on', str(now.day)+'-'+str(now.month)+'-'+str(now.year), 'at', str(now.hour)+':'+str(now.minute))
        return False

In [28]:
output = dataframe_generator()

New DF generated successfully on 27-6-2022 at 21:18


## Displaying the coldest and hottest towns in a map

Now that we have the scraping nicely packed into a loop, it's time to create our map. For this purpose we'll use three main elements:

    1. Data manipulation to obtain the coldest/hottest towns and select them randomly (if they're >1).
    2. Geocoding to obtain the town coordinates.
    3. Folium to display the map with both towns.

In [4]:
# Obtaining a dataframe containing the coldest and hottest towns. We're reusing the previous code.

coldest = output[output['temp'] == output['temp'].min()]
hottest = output[output['temp'] == output['temp'].max()]

In [10]:
coldest.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57 entries, 261 to 24701
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   url          57 non-null     object 
 1   pelmorex_id  57 non-null     object 
 2   name         57 non-null     object 
 3   province     57 non-null     object 
 4   temp         57 non-null     float64
 5   timestamp    57 non-null     object 
dtypes: float64(1), object(5)
memory usage: 3.1+ KB


In [8]:
hottest.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 8875 to 8875
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   url          1 non-null      object 
 1   pelmorex_id  1 non-null      object 
 2   name         1 non-null      object 
 3   province     1 non-null      object 
 4   temp         1 non-null      float64
 5   timestamp    1 non-null      object 
dtypes: float64(1), object(5)
memory usage: 56.0+ bytes


We can see how the coldest and hottest temperatures might be shared among several towns.

The next step is using geocoding to obtain the coordinates of a randomly selected town from each subset. We know that geocoding doesn't always work, so we'll first apply it to the **town_index** dataframe and save the coordinates as a new column. The rows (towns) that cannot be reliably geocoded will be dropped.

In [11]:
# This function will try to geocode a given town, otherwise it returns False.

def geocoder(town_name, province_name):
    try:
        ubi = town_name + ', ' + province_name + ', ' + 'Spain'
        locator = Nominatim(user_agent='myGeocoder')
        location = locator.geocode(ubi)
        return (location.latitude, location.longitude)
    except:
        return False

In [12]:
#This new column will hold the town coordinates if geocoding is possible

town_index = pd.read_csv('town_index.csv') # Loading the dataframe
town_index['coords'] = ''

In [15]:
# Applying the function to every row

for i in tqdm(range(len(town_index))): # Iterating through every row (hideous, I know, but 'apply' isn't any faster here)
    town_name = town_index['name'].iloc[i]
    province_name = town_index['province'].iloc[i]
    ubi = geocoder(town_name, province_name) # Applying the function
    town_index['coords'].iloc[i] = ubi # Storing the coords

100%|██████████████████████████████████████████████████████████████████████████| 24858/24858 [3:27:55<00:00,  1.99it/s]


In [21]:
# Now let's perform some simple operations

#Filtering the DF and keeping only rows with coordinates
town_index = town_index[town_index['coords'] != False]

# Resetting the index since we deleted rows
town_index.reset_index(drop=True)

# Saving it
town_index.to_csv('town_index.csv', index=False)

## Map creation

Now that out dataframe contains town coordinates we can finally plot both towns in a map. First of all, we'll select them randomly.

In [49]:
# Getting the coldest/hottest towns again

coldest = output[output['temp'] == output['temp'].min()]
hottest = output[output['temp'] == output['temp'].max()]

In [50]:
# Selecting one of the coldest towns.

coldest.sample()

Unnamed: 0,url,pelmorex_id,name,province,coords,temp,timestamp
12253,https://www.eltiempo.es/mallolis.html,ESXX11059,Mallolís,Lleida,"(42.4958721, 1.2580179)",6,2022-06-27 21:05


In [42]:
#Same with the hot ones.

hottest.sample()

Unnamed: 0,url,pelmorex_id,name,province,temp,timestamp
8875,https://www.eltiempo.es/baleares/masella.html,ESCM0001,Masella,Baleares,35.0,2022-06-27 15:25


Let's now display both points on a map. We'll use some custom icons and different colors to differentiate them.

In [52]:
c = coldest.sample()
h = hottest.sample()

# Creating a map centered on Spain

temp_map = map = folium.Map(location=[40.4165, -3.70256], zoom_start=6.5)

# Adding coldest town.

folium.Marker(location= eval(c['coords'].iloc[0]), 
              popup= c['name'].iloc[0] + ', ' + c['province'].iloc[0]+'\n'+str(c['temp'].iloc[0])+'ºC',
              icon=folium.Icon(color='blue',icon='glyphicon glyphicon-cloud')
              ).add_to(temp_map)

# And now the hottest.

folium.Marker(location= eval(h['coords'].iloc[0]), 
              popup= h['name'].iloc[0] + ', ' + h['province'].iloc[0]+'\n'+str(h['temp'].iloc[0])+'ºC',
              icon=folium.Icon(color='red', icon='glyphicon glyphicon-fire')
              ).add_to(temp_map)

# Saving the map as an html file

temp_map.save(outfile= "map.html")

In [53]:
# Displaying the map

temp_map

## Joining the whole process in a single function

Since we want to host this website on a **Raspberry**, it'll need to be fed a new *html* file containing the map at set intervals. The best way to accomplish this is to pack the whole process into a function and then run it as needed.

In [5]:
#Importing libraries.

import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import shutup
shutup.please()
import lxml.html
import re
import threading
import datetime
import geopy
from geopy.geocoders import Nominatim
import folium

def main():
    try:
        def dataframe_generator():
            town_index = pd.read_csv('town_index.csv') #Loading the town_index csv

            # Storing all pelmorex_id as a list
            pelmorex_list = town_index['pelmorex_id'].tolist()

            # Scraping all links and holding the data as a list of dictionaries.
            meteo = []  
            a = 0 
            b = 500 
            
            def fetch_links(n):
                try:
                    id = n
                    page = requests.get('https://www.eltiempo.es/api/v1/get_current_conditions_by_pelmorex_id/' + str(id))
                    soup = eval(BeautifulSoup(page.content, 'lxml').text.replace('null', """'null'"""))
                    temp_dict = {'pelmorex_id': id, 'temp': soup['temperature']['c'], 'timestamp': soup['timestamp']['local']}
                    meteo.append(temp_dict)
                except:
                    pass
                
            while b < 22500: 


                threads = [threading.Thread(target=fetch_links, args=(i,))
                           for i in pelmorex_list[a:b]] 

                for t in threads:
                    t.start()
                a += 500
                b += 500
                time.sleep(0.5)

            time.sleep(2)

            # Saving the output as a dataframe
            meteo_df = pd.DataFrame(meteo)

            # Merging the dataframes and saving the output
            output = pd.merge(town_index, meteo_df, left_on='pelmorex_id', right_on='pelmorex_id', how='left')
            output.to_csv('output.csv', index=False)

            return output


        # Generating the dataframe
        output = dataframe_generator()

        # Locating the coldest/hottest towns and randomizing the selection
        c = output[output['temp'] == output['temp'].min()].sample()
        h = output[output['temp'] == output['temp'].max()].sample()

        # Creating the map and saving it
        temp_map = map = folium.Map()

        folium.Marker(location= eval(c['coords'].iloc[0]), 
                      popup= c['name'].iloc[0] + ', ' + c['province'].iloc[0]+'\n'+str(c['temp'].iloc[0])+'ºC',
                      icon=folium.Icon(color='blue', icon='glyphicon glyphicon-cloud')
                      ).add_to(temp_map)

        folium.Marker(location= eval(h['coords'].iloc[0]), 
                      popup= h['name'].iloc[0] + ', ' + h['province'].iloc[0]+'\n'+str(h['temp'].iloc[0])+'ºC',
                      icon=folium.Icon(color='red', icon='glyphicon glyphicon-fire')
                      ).add_to(temp_map)
        
        temp_map.fit_bounds([eval(c['coords'].iloc[0]), eval(h['coords'].iloc[0])])

        # Exporting the map with the required name for NGINX
        temp_map.save(outfile= "index.nginx-debian.html")

        # This block prints a success/failure message every time the function runs
        now = datetime.datetime.now()
        print('Map generated successfully on', str(now.day)+'-'+str(now.month)+'-'+str(now.year), 'at', str(now.hour)+':'+str(now.minute))
        print('\n')
    except:
        now = datetime.datetime.now()
        print('Process FAILED on', str(now.day)+'-'+str(now.month)+'-'+str(now.year), 'at', str(now.hour)+':'+str(now.minute))
        print('\n')
        pass
        

print('Main function running')
print('\n')
print('\n')

# Running the main function
main()

Main function running




Map generated successfully on 25-7-2022 at 19:31


