Join the data from Part 1 with the data from Part 2 to create a new dataframe.

In [None]:
# Assuming you have 'stations', 'foursquare_df', and 'yelp_df' DataFrames
import pandas as pd

# Merge Foursquare data with station details
foursquare_merged = pd.merge(stations, foursquare_df, left_on=['latitude', 'longitude'], right_on=['venue.location.lat', 'venue.location.lng'], how='left')

# Merge Yelp data with station details
yelp_merged = pd.merge(stations, yelp_df, left_on=['latitude', 'longitude'], right_on=['coordinates.latitude', 'coordinates.longitude'], how='left')

# Concatenate both merged DataFrames
merged_data = pd.concat([foursquare_merged, yelp_merged], keys=['Foursquare', 'Yelp'])

Provide a visualization that you used as part of your EDA process. Explain the initial pattern or relationship you discoved through this visualization. 

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.bar(merged_data.index.get_level_values(1), merged_data['free_bikes'])
plt.xticks(rotation=45)
plt.xlabel('Station')
plt.ylabel('Number of Available Bikes')
plt.title('Available Bikes at Each Station')
plt.show()

# Database

Put all your results in an SQLite3 database (remember, SQLite stores its databases as files in your local machine - make sure to create your database in your project's data/ directory!)

In [None]:
import sqlite3

# Create a connection to the database (or connect to an existing one)
conn = sqlite3.connect('bike_pois.db')

# Create a cursor object
cursor = conn.cursor()

# Create a table for POI data
cursor.execute('''
    CREATE TABLE IF NOT EXISTS pois (
        id INTEGER PRIMARY KEY,
        station_id INTEGER,
        api_source TEXT,
        poi_name TEXT,
        category TEXT,
        latitude REAL,
        longitude REAL,
        num_bikes INTEGER,
        rating REAL,
        reviews_count INTEGER
    )
''')

# Iterate through the merged data and insert records into the table
for index, row in merged_data.iterrows():
    cursor.execute('''
        INSERT INTO pois (station_id, api_source, poi_name, category, latitude, longitude, num_bikes, rating, reviews_count)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (index[1], index[0], row['venue.name'], row['venue.categories'][0]['name'], row['latitude_x'], row['longitude_x'], row['free_bikes'], row['venue.rating'] if 'venue.rating' in row else None, row['venue.ratingSignals'] if 'venue.ratingSignals' in row else None))

# then I Commit the changes and close the connection
conn.commit()
conn.close()

Look at the data before and after the join to validate your data.

In [None]:
# Check for duplicate entries
duplicate_entries = merged_data[merged_data.duplicated(subset=['latitude', 'longitude', 'api_source'])]
print("Duplicate Entries:")
print(duplicate_entries)

# Check for missing values
missing_values = merged_data[merged_data.isnull().any(axis=1)]
print("Missing Values:")
print(missing_values)

# Compare counts
print("Counts:")
print("Original Foursquare Count:", len(foursquare_df))
print("Original Yelp Count:", len(yelp_df))
print("Merged Data Count:", len(merged_data))

# Verify data types
print("Data Types:")
print(merged_data.dtypes)