In [1]:
import os
import numpy as np
import pickle as pkl
from openai import OpenAI

## Load Dataset

In [2]:
city = 'ny'

city_full_name = {
    'ny': 'New York City',
    'hs': 'Houston',
    'sf': 'San Francisco'
}

In [3]:
with open('indices.pkl', 'rb') as f:
    indices = pkl.load(f)
    
with open('dates.pkl', 'rb') as f:
    dates = pkl.load(f)
    
with open(f'time_series_{city}.pkl', 'rb') as f:
    data = pkl.load(f)

In [4]:
print(indices)

[0, 24, 48, 72, 96, 120, 144, 168, 192, 216, 240, 264, 288, 312, 336, 360, 384, 408, 432, 456, 480, 504, 528, 552, 576, 600, 624, 648, 672, 696, 720, 744, 768, 792, 816, 840, 864, 888, 912, 936, 960, 984, 1008, 1032, 1056, 1080, 1104, 1128, 1152, 1176, 1200, 1224, 1248, 1272, 1296, 1320, 1344, 1368, 1392, 1416, 1440, 1464, 1488, 1512, 1536, 1560, 1584, 1608, 1632, 1656, 1680, 1704, 1728, 1752, 1776, 1800, 1824, 1848, 1872, 1896, 1920, 1944, 1968, 1992, 2016, 2040, 2064, 2088, 2112, 2136, 2160, 2184, 2208, 2232, 2256, 2280, 2304, 2328, 2352, 2376, 2400, 2424, 2448, 2472, 2496, 2520, 2544, 2568, 2592, 2616, 2640, 2664, 2688, 2712, 2736, 2760, 2784, 2808, 2832, 2856, 2880, 2904, 2928, 2952, 2976, 3000, 3024, 3048, 3072, 3096, 3120, 3144, 3168, 3192, 3216, 3240, 3264, 3288, 3312, 3336, 3360, 3384, 3408, 3432, 3456, 3480, 3504, 3528, 3552, 3576, 3600, 3624, 3648, 3672, 3696, 3720, 3744, 3768, 3792, 3816, 3840, 3864, 3888, 3912, 3936, 3960, 3984, 4008, 4032, 4056, 4080, 4104, 4128, 4152, 417

In [5]:
print(dates)

['2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-02', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-03', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012-10-04', '2012

In [7]:
print(data.shape)
print(data)

(45216, 5)
[[  55.         1012.          289.0403111     6.          264.        ]
 [  54.         1012.          289.11957459    6.          264.        ]
 [  54.         1012.          289.19883808    6.          265.        ]
 ...
 [  58.         1020.          284.98          2.            0.        ]
 [  58.         1020.          284.98          2.            0.        ]
 [  58.         1020.          284.98          2.            0.        ]]


In [None]:
data_size = data.shape[0]
window_size = 24
print(data_size, window_size, len(indices))

## Prompt GPT4

In [None]:
API_KEY = ''

In [None]:
system_prompt = f"Your job is to act as a professional weather analyst. You will write a high-quality report that is informative and helps in understanding the current weather situation."
print(system_prompt)

In [None]:
client = OpenAI(api_key=API_KEY)

In [None]:
for i in indices:
    
    data_window = data[i:i+window_size]
    
    humidity = '|'.join([f"{x:.2f}" for x in data_window[:,0]])
    pressure = '|'.join([f"{x:.2f}" for x in data_window[:,1]])
    temperature = '|'.join([f"{x:.2f}" for x in data_window[:,2]])
    wind_speed = '|'.join([f"{x:.2f}" for x in data_window[:,3]])
    wind_direction = '|'.join([f"{x:.2f}" for x in data_window[:,4]])

    user_prompt = f"Your task is to analyze key weather indicators in {city_full_name[city]} over the last {window_size} hours."
    user_prompt += f"\n\nReview the time-series data provided for the last {window_size} hours. "
    user_prompt += f"Each time-series consists of hourly values separated by a \'|\' token for the following indicators:\n"
    user_prompt += f"- Temperature (Kelvin): {temperature}\n- Humidity (%): {humidity}\n- Air Pressure (hPa): {pressure}\n- Wind Speed (m/s): {wind_speed}\n- Wind Direction (degrees): {wind_direction}\n\n"
    user_prompt += f"Based on this time-series data, write a concise report that provides insights crucial for understanding the current weather situation. "
    user_prompt += f"Your report should be limited to five sentences, yet comprehensive, highlighting key trends and considering their potential impact on the weather in {city_full_name[city]}."
    user_prompt += f"Do not write numerical values while writing the report."
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content

    with open(f'gpt_summary/{city}_{i}.txt', 'w') as f:
        f.write(text)