# üå§Ô∏è Fetching Daily Temperature from Open-Meteo API

In [None]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 40.7143,
	"longitude": -74.006,
	"start_date": "2023-01-01",
	"end_date": "2023-06-30",
	"daily": "temperature_2m_mean",
	"timezone": "America/New_York",
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}¬∞N {response.Longitude()}¬∞E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end =  pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_mean"] = daily_temperature_2m_mean

daily_dataframe = pd.DataFrame(data = daily_data)
print("\nDaily data\n", daily_dataframe)

Coordinates: 40.738136291503906¬∞N -74.04254150390625¬∞E
Elevation: 51.0 m asl
Timezone: b'America/New_York'b'GMT-5'
Timezone difference to GMT+0: -18000s

Daily data
                          date  temperature_2m_mean
0   2023-01-01 05:00:00+00:00             8.557918
1   2023-01-02 05:00:00+00:00             7.753750
2   2023-01-03 05:00:00+00:00             9.464166
3   2023-01-04 05:00:00+00:00            12.853749
4   2023-01-05 05:00:00+00:00             9.685000
..                        ...                  ...
176 2023-06-26 05:00:00+00:00            21.559999
177 2023-06-27 05:00:00+00:00            21.530830
178 2023-06-28 05:00:00+00:00            22.607912
179 2023-06-29 05:00:00+00:00            22.126665
180 2023-06-30 05:00:00+00:00            22.812080

[181 rows x 2 columns]


### Preparing Weather DataFrame

Here we create a copy of the daily temperature DataFrame (`daily_dataframe`) to use as our **weather dataset**.  

We also convert the `date` column to **date-only format** (removing the time part)


In [7]:
weather_df = daily_dataframe.copy()
weather_df["date"] = weather_df["date"].dt.date
weather_df

Unnamed: 0,date,temperature_2m_mean
0,2023-01-01,8.557918
1,2023-01-02,7.753750
2,2023-01-03,9.464166
3,2023-01-04,12.853749
4,2023-01-05,9.685000
...,...,...
176,2023-06-26,21.559999
177,2023-06-27,21.530830
178,2023-06-28,22.607912
179,2023-06-29,22.126665


#### Loading Coffee Sales Dataset

In [8]:
import pandas as pd

!pip install openpyxl

coffe_data = pd.read_excel("C:\\Users\\zahra_gmdmxxo\\OneDrive\\Desktop\\Datacube\\3.PYTHON\\final_project\\coffee_sales_features.xlsx"
)



#### Aggregate Coffee Sales by Day

Here we create a **daily summary** of the coffee sales dataset.  

We group the transactions by `transaction_date` and calculate key daily metrics:

- **total_transactions** ‚Üí total number of transactions per day (`count`)  
- **total_items_sold** ‚Üí sum of all items sold per day (`sum`)  
- **total_revenue** ‚Üí total revenue per day (`sum`)  
- **avg_unit_price** ‚Üí average unit price of items sold per day (`mean`)

In [9]:
daily_sales = (
    coffe_data
    .groupby("transaction_date")
    .agg(
        total_transactions=("transaction_id", "count"),
        total_items_sold=("transaction_qty", "sum"),
        total_revenue=("total_sales", "sum"),
        avg_unit_price=("unit_price", "mean")
    )
    .reset_index()
    .rename(columns={"transaction_date": "date"})
)

In [None]:
daily_sales.head()

Unnamed: 0,date,total_transactions,total_items_sold,total_revenue,avg_unit_price
0,2023-01-01,550,802,2508.2,3.148727
1,2023-01-02,566,790,2403.35,3.081184
2,2023-01-03,582,823,2565.0,3.138488
3,2023-01-04,497,726,2220.1,3.085312
4,2023-01-05,547,778,2418.85,3.135923


In [12]:
daily_sales.dtypes

date                  datetime64[ns]
total_transactions             int64
total_items_sold               int64
total_revenue                float64
avg_unit_price               float64
dtype: object

In [13]:
weather_df.head()

Unnamed: 0,date,temperature_2m_mean
0,2023-01-01,8.557918
1,2023-01-02,7.75375
2,2023-01-03,9.464166
3,2023-01-04,12.853749
4,2023-01-05,9.685


In [14]:
weather_df.dtypes

date                    object
temperature_2m_mean    float32
dtype: object

### Merge daily_sales and weather_df

In [15]:
weather_df["date"] = pd.to_datetime(weather_df["date"])
daily_sales["date"] = pd.to_datetime(daily_sales["date"])

merged_df = pd.merge(
    daily_sales,
    weather_df,
    on="date",
    how="left"
)

In [16]:
merged_df.head()

Unnamed: 0,date,total_transactions,total_items_sold,total_revenue,avg_unit_price,temperature_2m_mean
0,2023-01-01,550,802,2508.2,3.148727,8.557918
1,2023-01-02,566,790,2403.35,3.081184,7.75375
2,2023-01-03,582,823,2565.0,3.138488,9.464166
3,2023-01-04,497,726,2220.1,3.085312,12.853749
4,2023-01-05,547,778,2418.85,3.135923,9.685


In [17]:
merged_df.isna().sum()

date                   0
total_transactions     0
total_items_sold       0
total_revenue          0
avg_unit_price         0
temperature_2m_mean    0
dtype: int64

In [None]:
# merged_df.to_excel("daily_sales_with_weather.xlsx", index=False)