<h2><font size="3" color="#008B8B"><strong>Principais objetivos: </strong></font></h2>
<ul>
    <li><font size="2" color="#1f77b4">Coletar dados de Radiação solar da Solcast API.</font></li>
    <li><font size="2" color="#1f77b4">Limpar os dados (remover duplicados, remover valores nulos).</font></li>
    <li><font size="2" color="#1f77b4">Reamostrar dados para frequência horaria.</font></li>
    <li><font size="2" color="#1f77b4">Converter e formatar datas (timezone, formato pandas).</font></li>
    <li><font size="2" color="#1f77b4">Visualizar a distribuição dos dados.</font></li>
    <li><font size="2" color="#1f77b4">Salvar em arquivo csv.</font></li>
</ul>

In [1]:
import os
from pathlib import Path
from datetime import datetime, timedelta, timezone
import pytz
import httpx

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import seaborn as sns

from dotenv import load_dotenv

load_dotenv()


True

In [2]:
# Constants:

BR_TZ = pytz.timezone("America/Sao_Paulo")
UTC_TZ = pytz.timezone("UTC")
FMT = "%Y-%m-%d %H:%M:%S"
FREQUENCY = "H"

START_DATE = "2023-06-01 00:00:00"
END_DATE = "2023-10-06 23:59:59"

API_KEY = os.getenv("API_KEY_SOLCAST")    # https://toolkit.solcast.com.au/account/api-key
LAT = os.getenv("LAT")                    # latitude FGA (localização dos painéis solares)
LON = os.getenv("LON")                    # longitude FGA (localização dos painéis solares)


In [3]:
# Helper functions:


def send_request(client, url, params=None, headers=None):
    try:
        response = client.get(url, params=params, headers=headers)
        response.raise_for_status()

        return response.json()
    except httpx.HTTPError as exc:
        print(f"HTTP Exception for {exc.request.url} - {exc}")
        raise


def date_to_datetime(date):
    if isinstance(date, str):
        date = datetime.strptime(date, FMT)

    return BR_TZ.localize(date)


def date_to_iso(date):
    if isinstance(date, str):
        date = datetime.strptime(date, FMT)

    date_brtz = BR_TZ.localize(date)
    return date_brtz.isoformat()


def iso_to_date(date):
    if isinstance(date, str):
        date = datetime.fromisoformat(date)

    return date.astimezone(BR_TZ)


In [4]:
# Carregar dados históricos de Radicao Solar (https://api.solcast.com.au/data/historic):
# Dados historicos de 01-06-2023 a 30-09-2023 (freq: 15min)


def query_parameters(start_date, end_date):
    return {
        "latitude": LAT,
        "longitude": LON,
        "format": "json",
        "time_zone": "utc",
        "start": start_date.isoformat(),
        "end": end_date.isoformat(),
        "period": "PT15M",
    }


headers = {"Authorization": f"Bearer {API_KEY}"}
base_url = "https://api.solcast.com.au/data/historic/radiation_and_weather"

start_date = date_to_datetime(START_DATE) - timedelta(seconds=1)
end_date = date_to_datetime(END_DATE)
print("=" * 50)
print(f"Data inicial: {start_date}")
print(f"Data final  : {end_date}")

all_data = []

with httpx.Client(timeout=60) as client:
    while start_date <= end_date:
        end_request = start_date.replace(day=1, month=start_date.month + 1)
        end_request = min(end_request, end_date)
        print("-" * 50)
        print(f"start_request : {start_date}")
        print(f"end_request: {end_request}")

        query_params = query_parameters(start_date, end_request)
        response = send_request(client, base_url, params=query_params, headers=headers)
        data = response["estimated_actuals"]
        all_data.extend(data)

        last_date = data[-1]["period_end"]
        start_date = iso_to_date(last_date)

    else:
        print("-" * 100)
        print(f"Todos os dados foram coletados ({START_DATE} - {END_DATE}).")

print("-" * 100)
print("Examplo dos dados coletados:")
print(all_data[0])
print(all_data[-1])


Intervalo de datas:
START DATE: 2023-05-31 23:59:59-03:00
END DATE : 2023-10-06 23:59:59-03:00
start_date : 2023-05-31 23:59:59-03:00
end_request: 2023-06-01 23:59:59-03:00
start_date : 2023-06-02 00:00:00-03:00
end_request: 2023-07-01 00:00:00-03:00
start_date : 2023-07-01 00:00:00-03:00
end_request: 2023-08-01 00:00:00-03:00
start_date : 2023-08-01 00:00:00-03:00
end_request: 2023-09-01 00:00:00-03:00
start_date : 2023-09-01 00:00:00-03:00
end_request: 2023-10-01 00:00:00-03:00
start_date : 2023-10-01 00:00:00-03:00
end_request: 2023-10-06 23:59:59-03:00
Todos os dados foram coletados (2023-06-01 00:00:00 - 2023-10-06 23:59:59).
Examplo dos dados coletados:
{'air_temp': 18, 'dni': 0, 'ghi': 0, 'period_end': '2023-06-01T03:00:00.0000000Z', 'period': 'PT15M'}
{'air_temp': 20, 'dni': 0, 'ghi': 0, 'period_end': '2023-10-07T03:00:00.0000000Z', 'period': 'PT15M'}


In [5]:
def clean_data_item(item):
    return {
        "date_time": iso_to_date(item["period_end"]),
        "air_temp": item["air_temp"],
        "dni": item["dni"],
        "ghi": item["ghi"],
    }

data = [clean_data_item(item) for item in all_data]
df_raw = pd.DataFrame(data)

print(f"Shape: {df_raw.shape}")
df_raw.head()


Shape: (12289, 4)


Unnamed: 0,date_time,air_temp,dni,ghi
0,2023-06-01 00:00:00-03:00,18,0,0
1,2023-06-01 00:15:00-03:00,18,0,0
2,2023-06-01 00:30:00-03:00,18,0,0
3,2023-06-01 00:45:00-03:00,18,0,0
4,2023-06-01 01:00:00-03:00,18,0,0


In [6]:
# Formatar datas e definir/ordenar index

df_raw["date_time"] = pd.to_datetime(df_raw["date_time"], format="%Y-%m-%d %H:%M:%S")
df = df_raw.set_index("date_time")
df.index = df.index.tz_localize(None)
df.sort_index(inplace=True)

print(f"Shape: {df.shape}")
print(f"Interval: {df.index.min()} - {df.index.max()}")
df.head()


Shape: (12289, 3)
Interval: 2023-06-01 00:00:00 - 2023-10-07 00:00:00


Unnamed: 0_level_0,air_temp,dni,ghi
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-06-01 00:00:00,18,0,0
2023-06-01 00:15:00,18,0,0
2023-06-01 00:30:00,18,0,0
2023-06-01 00:45:00,18,0,0
2023-06-01 01:00:00,18,0,0


In [7]:
# Verificar itens duplicados

duplicated = df[df.index.duplicated(keep="first")]
print(f"Duplicated: {duplicated.shape}")

# Manter o primeiro e remover os itens duplicados
df_base = df[~df.index.duplicated(keep="first")]

print(f"New Shape: {df_base.shape}")
print(f"Interval: {df_base.index.min()} - {df_base.index.max()}")


Duplicated: (0, 3)
New Shape: (12289, 3)
Interval: 2023-06-01 00:00:00 - 2023-10-07 00:00:00


In [8]:
# Verificar se o df_freq está estritamente em ordem crescente e se os dados são equidistantes (freq = 1h)
# Frequência equidistante significa que a diferença entre cada ponto de dados é a mesma.

order = df_base.index.is_monotonic_increasing
time_diffs = df_base.index.to_series().diff()
equidistant = time_diffs.nunique() == 1

display(time_diffs[:3])
print(f"Ordem crescente: {order}")
print(f"Freq esquidistante: {equidistant}")


date_time
2023-06-01 00:00:00               NaT
2023-06-01 00:15:00   0 days 00:15:00
2023-06-01 00:30:00   0 days 00:15:00
Name: date_time, dtype: timedelta64[ns]

Ordem crescente: True
Freq esquidistante: True


In [9]:
# Resample para frequência horária

# defininr frequencia 15 min df-base

df_base = df_base.asfreq("15min")

df_hourly = df_base.resample(FREQUENCY).mean()
df_hourly = df_hourly.asfreq(FREQUENCY)

print(f"Shape df_base: {df_base.shape}")
print(f"Frequência: {df_base.index.freq}")
display(df_base.head())

print(f"Shape df_hourly: {df_hourly.shape}")
print(f"Frequência: {df_hourly.index.freq}")
print(f"Interval: {df_base.index.min()} - {df_base.index.max()}")
df_hourly.head()


Shape df_base: (12289, 3)
Frequência: <15 * Minutes>


Unnamed: 0_level_0,air_temp,dni,ghi
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-06-01 00:00:00,18,0,0
2023-06-01 00:15:00,18,0,0
2023-06-01 00:30:00,18,0,0
2023-06-01 00:45:00,18,0,0
2023-06-01 01:00:00,18,0,0


Shape df_hourly: (3073, 3)
Frequência: <Hour>
Interval: 2023-06-01 00:00:00 - 2023-10-07 00:00:00


Unnamed: 0_level_0,air_temp,dni,ghi
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-06-01 00:00:00,18.0,0.0,0.0
2023-06-01 01:00:00,17.5,0.0,0.0
2023-06-01 02:00:00,17.0,0.0,0.0
2023-06-01 03:00:00,17.0,0.0,0.0
2023-06-01 04:00:00,16.0,0.0,0.0


In [10]:
# Verificar valores nulos

display(df_hourly[df_hourly.isna().any(axis=1)])
print(df_hourly.isnull().sum())
print("-" * 50)

display(df_base[df_base.isna().any(axis=1)])
df_base.isnull().sum()


Unnamed: 0_level_0,air_temp,dni,ghi
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


air_temp    0
dni         0
ghi         0
dtype: int64
--------------------------------------------------


Unnamed: 0_level_0,air_temp,dni,ghi
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


air_temp    0
dni         0
ghi         0
dtype: int64

In [14]:
# Salvar arquivo csv (intervalo 01/06/2023 a 06/10/2023)
start_date = pd.to_datetime(START_DATE)
end_date = pd.to_datetime(END_DATE)

df_hourly = df_hourly.loc[start_date:end_date]
df_hourly.to_csv("../dataset/0.0_hourly_radiation_data.csv")

df_base = df_base.loc[start_date:end_date]
df_base.to_csv("../dataset/0.0_quarterly_radiation_data.csv")


----
#### Exemplo carregar dados da ultima semana

In [15]:
# Exemplo de como carregar dados das ultimas 168 horas (7 dias) )
# Dados de dadiacao solar (https://api.solcast.com.au/world_radiation/estimated_actuals)


def get_query_parameters():
    return {
        "latitude": LAT,
        "longitude": LON,
        "hours": 168,
        "format": "json",
    }


headers = {"Authorization": f"Bearer {API_KEY}"}
base_url = "https://api.solcast.com.au/world_radiation/estimated_actuals"


with httpx.Client(timeout=60) as client:
    query_params = get_query_parameters()
    response = send_request(client, base_url, params=query_params, headers=headers)
    data = response["estimated_actuals"]

print("=" * 80)
print("Examplo dos dados coletados:")
print(data[0])
print(data[-1])


Examplo dos dados coletados:
{'ghi': 2, 'ebh': 0, 'dni': 0, 'dhi': 2, 'cloud_opacity': 65, 'period_end': '2023-11-04T21:30:00.0000000Z', 'period': 'PT30M'}
{'ghi': 1, 'ebh': 0, 'dni': 0, 'dhi': 1, 'cloud_opacity': 84, 'period_end': '2023-10-28T21:30:00.0000000Z', 'period': 'PT30M'}


In [16]:
def clean_data_item(item):
    return {
        "date_time": iso_to_date(item["period_end"]),
        "ghi": item["ghi"],
        "ebh": item["ebh"],
        "dni": item["dni"],
        "dhi": item["dhi"],
        "cloud_opacity": item["cloud_opacity"],
    }


clean_data = [clean_data_item(item) for item in data]
df_raw = pd.DataFrame(clean_data)

df = df_raw.set_index("date_time")
df.sort_index(inplace=True)

print(f"Shape: {df.shape}")
print(f"Interval: {df.index.min()} - {df.index.max()}")
display(df.head())
df.tail()


Shape: (337, 5)
Interval: 2023-10-28 18:30:00-03:00 - 2023-11-04 18:30:00-03:00


Unnamed: 0_level_0,ghi,ebh,dni,dhi,cloud_opacity
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-10-28 18:30:00-03:00,1,0,0,1,84
2023-10-28 19:00:00-03:00,0,0,0,0,91
2023-10-28 19:30:00-03:00,0,0,0,0,95
2023-10-28 20:00:00-03:00,0,0,0,0,94
2023-10-28 20:30:00-03:00,0,0,0,0,94


Unnamed: 0_level_0,ghi,ebh,dni,dhi,cloud_opacity
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-04 16:30:00-03:00,89,0,0,89,80
2023-11-04 17:00:00-03:00,61,0,0,61,81
2023-11-04 17:30:00-03:00,38,0,0,38,80
2023-11-04 18:00:00-03:00,16,0,0,16,78
2023-11-04 18:30:00-03:00,2,0,0,2,65


----
#### Exemplo carregar dados de previsao das proximas 168 horas

In [17]:
# Exemplo de como carregar dados de previsao das proximas 168 horas (7 dias)
# Dados de dadiacao solar (https://api.solcast.com.au/world_radiation/forecasts)


def get_query_parameters():
    return {
        "latitude": LAT,
        "longitude": LON,
        "hours": 168,
        "format": "json",
    }


headers = {"Authorization": f"Bearer {API_KEY}"}
base_url = "https://api.solcast.com.au/world_radiation/forecasts"


with httpx.Client(timeout=60) as client:
    query_params = get_query_parameters()
    response = send_request(client, base_url, params=query_params, headers=headers)
    data = response["forecasts"]

print("Examplo dos dados coletados:")
print(data[0])
print(data[-1])


Examplo dos dados coletados:
{'ghi': 2, 'ghi90': 2, 'ghi10': 1, 'ebh': 0, 'dni': 0, 'dni10': 0, 'dni90': 0, 'dhi': 2, 'air_temp': 24, 'zenith': 90, 'azimuth': 106, 'cloud_opacity': 65, 'period_end': '2023-11-04T21:30:00.0000000Z', 'period': 'PT30M'}
{'ghi': 6, 'ghi90': 9, 'ghi10': 2, 'ebh': 0, 'dni': 0, 'dni10': 0, 'dni90': 26, 'dhi': 6, 'air_temp': 29, 'zenith': 90, 'azimuth': 108, 'cloud_opacity': 36, 'period_end': '2023-11-11T21:30:00.0000000Z', 'period': 'PT30M'}


In [18]:
def clean_data_item(item):
    return {
        "date_time": iso_to_date(item["period_end"]),
        "air_temp": item["air_temp"],
        "zenith": item["zenith"],
        "azimuth": item["azimuth"],
        "ghi": item["ghi"],
        "ghi90": item["ghi90"],
        "ghi10": item["ghi10"],
        "ebh": item["ebh"],
        "dni": item["dni"],
        "dni90": item["dni90"],
        "dni10": item["dni10"],
        "dhi": item["dhi"],
        "cloud_opacity": item["cloud_opacity"],
    }


clean_data = [clean_data_item(item) for item in data]
df_raw = pd.DataFrame(clean_data)

df = df_raw.set_index("date_time")
df.sort_index(inplace=True)

print(f"Shape: {df.shape}")
print(f"Interval: {df.index.min()} - {df.index.max()}")
display(df.head())
df.tail()


Shape: (337, 12)
Interval: 2023-11-04 18:30:00-03:00 - 2023-11-11 18:30:00-03:00


Unnamed: 0_level_0,air_temp,zenith,azimuth,ghi,ghi90,ghi10,ebh,dni,dni90,dni10,dhi,cloud_opacity
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-11-04 18:30:00-03:00,24,90,106,2,2,1,0,0,0,0,2,65
2023-11-04 19:00:00-03:00,23,97,108,0,0,0,0,0,0,0,0,76
2023-11-04 19:30:00-03:00,22,104,111,0,0,0,0,0,0,0,0,77
2023-11-04 20:00:00-03:00,21,111,114,0,0,0,0,0,0,0,0,76
2023-11-04 20:30:00-03:00,21,117,117,0,0,0,0,0,0,0,0,77


Unnamed: 0_level_0,air_temp,zenith,azimuth,ghi,ghi90,ghi10,ebh,dni,dni90,dni10,dhi,cloud_opacity
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-11-11 16:30:00-03:00,32,62,101,415,459,172,274,575,776,0,142,10
2023-11-11 17:00:00-03:00,31,69,103,289,330,107,164,449,686,0,125,13
2023-11-11 17:30:00-03:00,31,76,104,165,202,53,66,257,547,0,99,19
2023-11-11 18:00:00-03:00,30,83,106,57,86,16,4,21,327,0,54,34
2023-11-11 18:30:00-03:00,29,90,108,6,9,2,0,0,26,0,6,36
