## Scraping with Selenium
`! pip install selenium` <br>
`! pip install webdriver-manager`

In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import time
import pathlib
import pickle
import re

## Getting Descriptions
Each ad has a url with more information about the property. We want to scrape that.


In [2]:
with open("results/malaga_page_1.pickle",'rb') as f:
    data = pickle.load(f, encoding='utf-8')

In [3]:
data[:3]

[{'title': 'Modern Sea View Apartments with Spacious Terraces in Fuengirola',
  'desc': 'The sea-view apartments with large terraces are situated in a prestigious community in Fuengirola, Costa del Sol. The gated and secured complex has amazing facilities and social and sports clubs.',
  'price': 'FROM\n€435.000',
  'details': ['FUENGIROLA - MÁLAGA', '1, 2, 3', '1, 2'],
  'url': 'https://spainhomes.com/ad/agp-0732-new-build-apartments-with-sea-views-in-prime-area-of-fuengirola'},
 {'title': 'Spacious Villa with Panoramic Sea Views in Benalmadena',
  'desc': 'Villa with panoramic sea views is situated in Benalmadena, Costa del Sol. A spacious three-bedroom villa has a generous garden and a private swimming pool.',
  'price': '€1.395.000',
  'details': ['BENALMÁDENA - MÁLAGA', '3', '3'],
  'url': 'https://spainhomes.com/ad/agp-0785-villa-with-excellent-location-and-sea-view-in-benalmadena'},
 {'title': 'Newly Built Villa with an Appealing Design in Mijas',
  'desc': 'The villa is situate

In [27]:
url = data[1]["url"]
url

'https://spainhomes.com/ad/agp-0785-villa-with-excellent-location-and-sea-view-in-benalmadena'

## Task 1:
Retrieve information under 'General Details' and obtain the 'Summary,' focusing on the square meters (m^2), number of bedrooms, and number of bathrooms. It would also be good to have additional information if available, such as the presence of a pool, distance to the beach, and energy efficiency. <br>
You might need to use string manipulation or simple regular expression to clean up your features.

In [28]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
driver.get(url)

In [11]:
# sample of possible raw features
features

['REF.\nAGP-0785',
 'Realtor Fee\nFREE',
 'Location\nMálaga / Benalmádena',
 'Total Floors\n2',
 'Bedrooms\n3',
 'Bathrooms\n3',
 'Pool\nPrivate Pool',
 'Completion Date\n6 / 2024',
 'Distance to Airport\n0-50 km',
 'Distance to Beach\n1-5 km',
 '']

In [12]:
driver.quit()

In [26]:
# sample of some of the clean feautures
get_features_from_raw(size, features)

{'size': 243,
 'Realtor Fee': 'FREE',
 'Location': 'Málaga / Benalmádena',
 'Total Floors': '2',
 'Bedrooms': '3',
 'Bathrooms': '3',
 'Pool': 'Private Pool',
 'Completion Date': '6 / 2024',
 'Distance to Airport': '0-50 km',
 'Distance to Beach': '1-5 km'}

## Task 2:
Write a function that given the driver as an input returns a dictionary with all the new info.

In [41]:
# sample of updated info
get_more_info(driver)

{'size': 243,
 'Realtor Fee': 'FREE',
 'Location': 'Málaga / Benalmádena',
 'Total Floors': '2',
 'Bedrooms': '3',
 'Bathrooms': '3',
 'Pool': 'Private Pool',
 'Completion Date': '6 / 2024',
 'Distance to Airport': '0-50 km',
 'Distance to Beach': '1-5 km',
 'desc': 'Well-Located and Stylish Villa with Sea View in Benalmadena This exclusive villa is located in Benalmadena, a coastal town in the southern region of Andalusia, Costa del Sol. The villa is situated on a picturesque hilltop part of Benalmadena, offering breathtaking views of the Mediterranean Sea. It is considered among the best places to buy villa in Benalmadena. It is a cosmopolitan town that is known for its stunning beaches and numerous tourist attractions. The villa has an excellent location close to all amenities. The villa is 1,9 km from the beach, 5,9 km from Torrequebrada golf course, 21 km from Malaga International Airport, and 38 km from Marbella. To enjoy the outdoors, the villa has a private garden, that seamles

## Task 3
Write a function that given an element of `data`, updates the new information using previouly define functions. This function should run the driver to get the new info before updating.

In [46]:
basic_info = data[1]
basic_info

{'title': 'Spacious Villa with Panoramic Sea Views in Benalmadena',
 'desc': 'Villa with panoramic sea views is situated in Benalmadena, Costa del Sol. A spacious three-bedroom villa has a generous garden and a private swimming pool.',
 'price': '€1.395.000',
 'details': ['BENALMÁDENA - MÁLAGA', '3', '3'],
 'url': 'https://spainhomes.com/ad/agp-0785-villa-with-excellent-location-and-sea-view-in-benalmadena'}

In [47]:
update_info(data[1])

{'title': 'Spacious Villa with Panoramic Sea Views in Benalmadena',
 'desc': 'Well-Located and Stylish Villa with Sea View in Benalmadena This exclusive villa is located in Benalmadena, a coastal town in the southern region of Andalusia, Costa del Sol. The villa is situated on a picturesque hilltop part of Benalmadena, offering breathtaking views of the Mediterranean Sea. It is considered among the best places to buy villa in Benalmadena. It is a cosmopolitan town that is known for its stunning beaches and numerous tourist attractions. The villa has an excellent location close to all amenities. The villa is 1,9 km from the beach, 5,9 km from Torrequebrada golf course, 21 km from Malaga International Airport, and 38 km from Marbella. To enjoy the outdoors, the villa has a private garden, that seamlessly blends with its surroundings and has a private pool. The idyllic location offers stunning sea views and tranquil living space close to all amenities. The villa has an open-plan kitchen w

## Task 4: 
Write a function that given a path like "results/malaga_page_1.pickle" updates the data for every listing and saves the new information in a path like "detailed_results/malaga_page_1.pickle".

In [52]:
path = pathlib.Path("results/malaga_page_1.pickle")
results_dir = pathlib.Path("detailed_results")
path.name

'malaga_page_1.pickle'

In [53]:
new_path = results_dir/path.name
new_path

PosixPath('detailed_results/malaga_page_1.pickle')

In [69]:
! mkdir detailed_results

In [63]:
new_path = results_dir/path.name
new_path

PosixPath('detailed_results/malaga_page_1.pickle')

In [70]:
def dump_page(new_data, new_path):
    file = open(new_path, 'wb')
    pickle.dump(new_data , file)
    file.close()
    print(new_path)
dump_page(new_data, new_path)

detailed_results/malaga_page_1.pickle


In [71]:
all_files = [x for x in pathlib.Path("results").iterdir()]

In [73]:
all_files.sort()