In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
#import json
import os

import pandas as pd
pd.set_option("display.max_columns", 30)
import requests

In [None]:
#
def formated_day_back(days: int):
    current_datetime = datetime.now() - relativedelta(days=days)
    return current_datetime.strftime("%Y-%m-%d")

In [None]:
"""
1. get the data from s3
2. weather data transformation
3. taxi trips transformation
4. update payment_type_master
5. update company_master
6. update taxi trips with company and payment_type ids (from master tables)
7. upload the newest payment_type_master and company_master
8. upload weather data to s3
9. upload taxi data to s3
"""

### Taxi trips transformation

In [None]:
formated_datetime = formated_day_back(60)

url = "https://data.cityofchicago.org/resource/ajtu-isnz.json"
params = f"$where=trip_start_timestamp>='{formated_datetime}T00:00:00' AND trip_start_timestamp<='{formated_datetime}T23:59:59'&$limit=30000"
headers = {"X-App-Token": os.environ.get("CHICAGO_API_TOKEN")}

response = requests.get(url, headers = headers, params = params)

data = response.json()

In [None]:
taxi_trips = pd.DataFrame(data)

#### taxi_trips transformation function

In [None]:
def taxi_trips_transformations(taxi_trips: pd.DataFrame) -> pd.DataFrame:
    
    """ Performs transformations on a DataFrame containing taxi trip data.

    Parameters:
        taxi_trips (pd.DataFrame): A DataFrame containing taxi trip data.

    Returns:
        pd.DataFrame: The cleaned, transformed DataFrame.
    """

    if not isinstance(taxi_trips, pd.DataFrame):
        raise TypeError("taxi_trips is not a valid pandas Dataframe.")
    
    taxi_trips.drop(["pickup_census_tract", "dropoff_census_tract", "pickup_centroid_location", "dropoff_centroid_location"],
                    axis=1, inplace=True)

    taxi_trips.dropna(inplace=True)

    taxi_trips.rename(columns={"pickup_community_area": "pickup_community_area_id",
                            "dropoff_community_area": "dropoff_community_area_id"}, inplace=True)

    taxi_trips["trip_start_timestamp"] = pd.to_datetime(taxi_trips["trip_start_timestamp"])
    taxi_trips["trip_end_timestamp"] = pd.to_datetime(taxi_trips["trip_end_timestamp"])

    taxi_trips["datetime_for_weather"] = taxi_trips["trip_start_timestamp"].dt.floor("H")

    return taxi_trips

In [None]:
taxi_trips_transformations(taxi_trips)