In [None]:
import requests
import time
from datetime import datetime, timedelta
from pytz import timezone
from copy import deepcopy
import pandas as pd

In [None]:
pollutants = ['PM10', 'PM25', 'O3', 'CO', 'NO2', 'SO2']
stations = ['Karpos', 'Centar', 'GaziBaba', 'Lisice', 'Rektorat', 'Miladinovci']

In [None]:
date_start = datetime(2000, 1, 1, 0, 0, 0)
date_end = datetime(2020, 4, 1, 0, 0, 0)
period = timedelta(days=150)
offset = timedelta(seconds=1)

In [None]:
sleep_time = 5

for station in stations:
    for pollutant in pollutants:
        results = []
        curr_start = deepcopy(date_start)
        while curr_start < date_end:
            try:
                time.sleep(5)
                
                curr_end = curr_start + period - offset
                print(curr_start.strftime("%d/%m/%Y, %H:%M:%S"), '-', curr_end.strftime("%d/%m/%Y, %H:%M:%S"), 
                     station, pollutant)

                URL = f'http://air.moepp.gov.mk/graphs/site/pages/MakeGraph.php?station={station}'+\
                      f'&parameter={pollutant}'+\
                      f'&beginDate={curr_start.strftime("%Y-%m-%d")}'+\
                      f'&beginTime={curr_start.strftime("%H:%M")}'+\
                      f'&endDate={curr_end.strftime("%Y-%m-%d")}'+\
                      f'&endTime={curr_end.strftime("%H:%M")}'+\
                      f'&i=1585697894082&lang=mk'

                curr_start += period

                r = requests.get(url = URL) 
                data = r.json()

                # sanity check to make sure the json has all the expected fields 
                if 'measurements' not in data or len(data['measurements']) == 0 \
                    or 'data' not in data['measurements'][0]:
                    continue

                measurements = data['measurements'][0]['data']
                timestamps = data['times']

                rows = list(zip(timestamps, measurements))
                results.extend(rows)
                
                time_sleep = 5
            
            except Exception as e:
                print(e)
                time.sleep(sleep_time)
                sleep_time += 5
                
                  
        
        with open(f'data/raw-sensor/{station}_{pollutant}', 'w+') as f:
            f.write('Timestamp,Value\n')
            for timestamp, value in results:
                f.write(f'{timestamp},{value}\n')
    