In [5]:
import json
import requests
import os
import time
import datetime
import requests
import pandas as pd
import pytz
from dotenv import load_dotenv

In [6]:
load_dotenv()
API_KEY = os.getenv("API_KEY")
TIME_T0 = 1606266000

In [7]:
def get_current_time():
    # Get current time
    current_time = datetime.datetime.now()

    # Convert to UTC time zone
    current_time_utc = datetime.datetime.now(pytz.utc)

    # Convert UTC time to Unix time
    unix_time = int(current_time_utc.timestamp())
    print("Current time in local:", current_time.strftime("%Y-%m-%d %H:%M:%S %Z"))
    print("Current time in UTC:", current_time_utc.strftime("%Y-%m-%d %H:%M:%S %Z"))
    print("Unix time:", unix_time)
    return unix_time

In [8]:
def utcunix_to_local(timepoint):
    utc_datetime = datetime.datetime.fromtimestamp(timepoint)
    return utc_datetime

In [9]:
def get_weather(api_key):
    url = f"https://api.openweathermap.org/data/2.5/weather?q=London,uk&APPID={api_key}"
    response = requests.get(url).json()
    return response

In [10]:
def format_response(location, historical):
    components = historical['components']
    data = {
        'lon': str(location['lon']),
        'lat': str(location['lat']),
        'aqi': str(historical['main']['aqi']),
        'co': float(components['co']),
        'no': float(components['no']),
        'no2': float(components['no2']),
        'o3': float(components['o3']),
        'so2': float(components['so2']),
        'pm2_5': float(components['pm2_5']),
        'pm10': float(components['pm10']),
        'nh3': float(components['nh3']),
        'time': utcunix_to_local(historical['dt'])
    }
    return data

In [11]:
def get_historical(lat, lon, start, end, api_key):
    url = f"http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={start}&end={end}&appid={api_key}"
    response = requests.get(url).json()
    
    if 'coord' not in response:
        return None
    
    location = response['coord']
    
    if 'list' not in response:
        return None
    
    historical_data = response['list']
    
    data = [format_response(location, historical) for historical in historical_data]
    df = pd.DataFrame(data)
    return df

In [25]:
def process_data(city_list, TIME_T0, CURRENT_TIME, API_KEY):
    columns = ['lon', 'lat', 'aqi', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3', 'time']
    df = pd.DataFrame(columns=columns)

    for index, row in city_list.iterrows():
        data = get_historical(lat=row['Latitude'], lon=row['Longitude'], start=TIME_T0, end=CURRENT_TIME, api_key=API_KEY)
        if data is not None:
            df = pd.concat([df, data], ignore_index=True)

    return df

In [2]:
city_list = pd.read_csv('germany_city.csv')

In [3]:
city_list

Unnamed: 0,City,State,Latitude,Longitude
0,Berlin,Berlin,52.517037,13.38886
1,Hamburg,Hamburg,53.550341,10.000654
2,Munich,Bavaria,48.137108,11.575382
3,Cologne,North Rhine-Westphalia,50.938361,6.959974
4,Frankfurt,Hesse,50.110644,8.682092
5,Stuttgart,Baden-Württemberg,48.778449,9.180013
6,Dusseldorf,North Rhine-Westphalia,51.225402,6.776314
7,Leipzig,Saxony,51.340632,12.374733
8,Dortmund,North Rhine-Westphalia,51.514227,7.465279
9,Essen,North Rhine-Westphalia,51.458224,7.015817


In [4]:
city_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   City       60 non-null     object 
 1   State      60 non-null     object 
 2   Latitude   60 non-null     float64
 3   Longitude  60 non-null     float64
dtypes: float64(2), object(2)
memory usage: 2.0+ KB


In [13]:
df1 = city_list.iloc[0]

In [14]:
df1

City            Berlin
State           Berlin
Latitude     52.517037
Longitude     13.38886
Name: 0, dtype: object

In [18]:
new_df = pd.DataFrame([df1])

In [24]:
new_df

Unnamed: 0,City,State,Latitude,Longitude
0,Berlin,Berlin,52.517037,13.38886


In [16]:
CURRENT_TIME = get_current_time()

Current time in local: 2024-03-31 17:50:05 
Current time in UTC: 2024-03-31 15:50:05 UTC
Unix time: 1711900205


In [26]:
df = process_data(new_df, TIME_T0, CURRENT_TIME, API_KEY)

In [27]:
type(df)

pandas.core.frame.DataFrame

In [28]:
df

Unnamed: 0,lon,lat,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,time
0,13.3889,52.517,2,350.48,0.59,33.93,10.64,4.59,15.35,17.04,0.44,2020-11-25 02:00:00
1,13.3889,52.517,2,333.79,0.24,28.45,16.63,4.53,14.85,16.36,0.42,2020-11-25 03:00:00
2,13.3889,52.517,2,333.79,0.17,27.42,19.49,4.95,15.10,16.48,0.43,2020-11-25 04:00:00
3,13.3889,52.517,2,333.79,0.14,27.76,21.64,6.91,15.34,16.66,0.73,2020-11-25 05:00:00
4,13.3889,52.517,2,340.46,0.16,29.13,22.53,6.08,15.48,16.82,0.49,2020-11-25 06:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
29002,13.3889,52.517,2,270.37,1.87,6.34,49.35,2.71,17.31,25.24,4.94,2024-03-31 13:00:00
29003,13.3889,52.517,2,267.03,1.08,4.63,69.38,2.41,22.00,28.89,6.02,2024-03-31 14:00:00
29004,13.3889,52.517,2,263.69,0.52,3.51,85.83,1.89,23.18,28.18,6.40,2024-03-31 15:00:00
29005,13.3889,52.517,2,260.35,0.25,3.47,94.41,1.77,17.44,20.89,6.27,2024-03-31 16:00:00


In [30]:
API_KEY

'bfdd4fa139528dd83f09a31cebb41760'