In [11]:
import requests
import json


# --------------------------- Preprocessing ---------------------------
category_attractions = "attraction"
location = ""  # Update this with your desired location
name = ""      # Update this with your desired name

# Define URLs for attractions and POIs
url_attractions = f"http://tour-pedia.org/api/getPlaces?category={category_attractions}&location={location}&name={name}"
url_attraction_reviews = f"http://tour-pedia.org/api/getReviews?category={category_attractions}&location={location}&name={name}"

headers = {"accept": "application/json"}

# Function to send requests and save responses to JSON files
def fetch_and_save(url, filename):
    response = requests.get(url, headers=headers)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Check if the content is not empty
        if response.text.strip():  # Check if the response has content
            try:
                data = response.json()
                with open(filename, 'w') as json_file:
                    json.dump(data, json_file, indent=4)  # Pretty-print with indent=4
                print(f"Data has been saved to {filename}")
            except json.JSONDecodeError:
                print(f"Failed to decode JSON from {url}. Response content: {response.text[:100]}")
        else:
            print(f"Empty response from {url}. No data saved.")
    else:
        print(f"Failed to retrieve data from {url}. Status code: {response.status_code}, Response: {response.text[:100]}")


# --------------------------- Processing files and general clean up of data ---------------------------

# Fetch and save data for attractions and POIs
fetch_and_save(url_attractions, 'attractions.json')
fetch_and_save(url_attraction_reviews, 'attraction_reviews.json')
import pandas as pd
attraction_reviews_df = pd.read_json("attraction_reviews.json")
attractions_df = pd.read_json("attractions.json")
attraction_reviews_df['rating'] = pd.to_numeric(attraction_reviews_df['rating'], errors='coerce')
filtered_attraction_reviews_df = attraction_reviews_df[(attraction_reviews_df['rating'] >= 5) &
                                                       (attraction_reviews_df['polarity'] >= 10)]
filtered_attractions_df = attractions_df[
    (attractions_df['reviews'].notna()) & 
    (attractions_df['numReviews'].notna())
]
                                                       
# Convert the filtered DataFrame to a JSON string
json_data = attraction_reviews_df.to_json(orient='records', lines=False)
parsed_json = json.loads(json_data)
with open('filtered_attraction_reviews.json', 'w') as json_file:
    json.dump(parsed_json, json_file, indent=4)

from urllib.request import urlopen
import json

# Open and load the JSON file
print("Loading JSON file...")
with open("filtered_attraction_reviews.json", "r") as fp:
    reviews = json.load(fp)  # Load the JSON data into a Python list
print("Loaded JSON file successfully.")

# Iterate over each review item
for index, item in enumerate(reviews):
    print(f"Processing review {index + 1} of {len(reviews)}...")
    # Extract the "details" URL
    extracted_link = item.get("details")
    
    if extracted_link:  # Ensure the "details" key exists
        try:
            # Fetch the details from the URL
            print(f"Fetching details from: {extracted_link}")
            response = urlopen(extracted_link).read()
            review_details = json.loads(response)

            # Extract the place ID and add it to the item
            place_id = review_details['place']['id']
            item['place_id'] = place_id  # Add the place_id to the current review item
            
            print(f"Place ID for review {index + 1}: {place_id}")
        
        except Exception as e:
            print(f"Failed to fetch details for {extracted_link}: {e}")
            item['place_id'] = None  # Optionally set place_id to None if the fetch fails

# Write the updated data to a new JSON file
print("Writing updated data to the new JSON file (test_file.json)...")
with open("test_file2.json", "w") as fp:
    json.dump(reviews, fp, indent=4)  # Write the updated data with pretty formatting
print("Updated data written successfully to test_file.json.")
attraction_reviews_df = pd.read_json("test_file2.json")
filtered_attractions_df = filtered_attractions_df[filtered_attractions_df['location'] == "London"]
# Perform the merge
merged_df = pd.merge(filtered_attractions_df, 
                     attraction_reviews_df, 
                     left_on='id', 
                     right_on='place_id', 
                     how='inner')  # 'inner' ensures only matching rows are retained

# Display the resulting merged DataFrame
len(merged_df)
# Convert the filtered DataFrame to a JSON string
json_data = merged_df.to_json(orient='records', lines=False)
parsed_json = json.loads(json_data)
with open('merged_attractions_pretty.json', 'w') as json_file:
    json.dump(parsed_json, json_file, indent=4)

Data has been saved to attractions.json
Data has been saved to attraction_reviews.json
Data has been saved to poi.json
Data has been saved to poi_reviews.json


In [1]:
# --------------------------- Sending data to MongoDB ---------------------------

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import json

# Open the file and load the JSON content
with open('attractions.json', 'r') as file:
   attractions = json.load(file)


# send json file response to mongoDB
uri = "mongodb+srv://////////:///////@/////.////.mongodb.net/test?retryWrites=true&w=majority&tlsAllowInvalidCertificates=true"


# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))


# Change the connection string if needed
db = client['tourpedia_data']  # Create/use a database called 'tourpedia_data'
collection_attractions = db['attractions']  # Create/use a collection called 'restaurants'


# Deletes all documents in the 'restaurants' collection
collection_attractions.delete_many({})

# Inserts all documents
collection_attractions.insert_many(attractions)


print("Data successfully inserted into MongoDB!")


Data successfully inserted into MongoDB!
