In [1]:
import datetime
import numpy as np
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
INPUT_DATA_ISW_FOLDER = "data/0_raw_isw"
INPUT_DATA_ALARM_FOLDER = "data/output"
INPUT_DATA_ALL_WEATHER_FOLDER = "data/0_raw_weather"
INPUT_REGIONS_DATA_FOLDER = "data/0_raw_meta"

ISW_DATA_FILE = "isw_reports.csv"
ALARMS_DATA_FILE = "alarms.csv"
REGIONS_DATA_FILE = "regions.csv"
ALL_WEATHER_DATA_FILE = "all_weather_by_hour.csv"


OUTPUT_DATA_FOLDER = "data/output"
ISW_DATA_PREPARED_FILE = "isw_reports_prepared.csv"
ALARMS_WEATHER_MERGED_DATA_FILE = "alarms_weather_merged.csv"
ALL_MERGED_DATA_FILE = "all_merged_data.csv"



In [4]:
def isNaN(num):
    return num != num

In [5]:
df_isw = pd.read_csv(f"{INPUT_DATA_ISW_FOLDER}/{ISW_DATA_FILE}", sep=";")

In [6]:
df_isw.head(3)

Unnamed: 0,date,text,lemming,stemming,keywords
0,2022-02-24,Russian President Vladimir Putin began a large...,russian presid vladimir putin began larg scale...,russian presid vladimir putin began larg scale...,"{'pm': 0.411, 'airport': 0.281, 'kyiv': 0.234,..."
1,2022-02-25,Key Takeaways\n- Russian forces entered the ou...,key takeaway russian forc enter outskirt kyiv ...,key takeaway russian forc enter outskirt kyiv ...,"{'pm': 0.393, 'februari': 0.311, 'kyiv': 0.298..."
2,2022-02-26,Ukrainian resistance remains remarkably effect...,ukrainian resist remain remark effect russian ...,ukrainian resist remain remark effect russian ...,"{'februari': 0.398, 'kyiv': 0.364, 'twenti': 0..."


In [7]:
df_isw = df_isw.drop(["text", "lemming", "stemming"], axis=1)

In [8]:
df_isw["date_datetime"] = pd.to_datetime(df_isw["date"])

In [9]:
df_isw['date_tomorrow_datetime'] = df_isw['date_datetime'].apply(lambda x: x+datetime.timedelta(days=1))

## Prepare ISW

In [10]:
df_isw = df_isw.rename(columns = {"date_datetime":"report_date"})
df_isw.to_csv(f"{OUTPUT_DATA_FOLDER}/{ISW_DATA_PREPARED_FILE}", sep=";", index=False)

In [11]:
df_isw.head(3)

Unnamed: 0,date,keywords,report_date,date_tomorrow_datetime
0,2022-02-24,"{'pm': 0.411, 'airport': 0.281, 'kyiv': 0.234,...",2022-02-24,2022-02-25
1,2022-02-25,"{'pm': 0.393, 'februari': 0.311, 'kyiv': 0.298...",2022-02-25,2022-02-26
2,2022-02-26,"{'februari': 0.398, 'kyiv': 0.364, 'twenti': 0...",2022-02-26,2022-02-27


## Prepare alarms

In [12]:
df_alarms = pd.read_csv(f"{INPUT_DATA_ALARM_FOLDER}/{ALARMS_DATA_FILE}", sep=";")

In [13]:
df_alarms_v2 = df_alarms.drop(["id","region_id"],axis=1)

In [14]:
df_alarms_v2.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,region_nums,alarm_nums_24h
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,1,1.0
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,3.0


In [15]:
df_alarms_v2["start_time"] = pd.to_datetime(df_alarms_v2["start"])
df_alarms_v2["end_time"] = pd.to_datetime(df_alarms_v2["end"])

In [16]:
df_alarms_v2["start_hour"] = df_alarms_v2['start_time'].dt.floor('H')
df_alarms_v2["end_hour"] = df_alarms_v2['end_time'].dt.ceil('H')

In [17]:
df_alarms_v2["start_hour"] = df_alarms_v2.apply(lambda x: x["start_hour"] if not isNaN(x["start_hour"]) else x["event_hour"] , axis=1)
df_alarms_v2["end_hour"] = df_alarms_v2.apply(lambda x: x["end_hour"] if not isNaN(x["end_hour"]) else x["event_hour"] , axis=1)

In [18]:
df_alarms_v2.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,region_nums,alarm_nums_24h,start_time,end_time,start_hour,end_hour
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,1,1.0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,3.0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00


In [19]:
df_alarms_v2["day_date"] = df_alarms_v2["start_time"].dt.date

In [20]:
df_alarms_v2["start_hour_datetimeEpoch"] = df_alarms_v2['start_hour'].apply(lambda x: int(x.timestamp()) if not isNaN(x) else None)
df_alarms_v2["end_hour_datetimeEpoch"] = df_alarms_v2['end_hour'].apply(lambda x: int(x.timestamp()) if not isNaN(x) else None)

In [21]:
df_alarms_v2.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,region_nums,alarm_nums_24h,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,1,1.0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,3.0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,2022-02-26,1645862400,1645873200


In [22]:
df_alarms_v2.shape

(19933, 16)

## Prepare Weather

In [23]:
df_weather = pd.read_csv(f"{INPUT_DATA_ALL_WEATHER_FOLDER}/{ALL_WEATHER_DATA_FILE}")
df_weather["day_datetime"] = pd.to_datetime(df_weather["day_datetime"])

In [24]:
df_weather.shape

(190656, 67)

In [25]:
df_weather.head(3)

Unnamed: 0,city_latitude,city_longitude,city_resolvedAddress,city_address,city_timezone,city_tzoffset,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_feelslikemax,day_feelslikemin,day_feelslike,day_dew,day_humidity,day_precip,day_precipprob,day_precipcover,day_snow,day_snowdepth,day_windgust,day_windspeed,day_winddir,day_pressure,day_cloudcover,day_visibility,day_solarradiation,day_solarenergy,day_uvindex,day_severerisk,day_sunrise,day_sunriseEpoch,day_sunset,day_sunsetEpoch,day_moonphase,day_conditions,day_description,day_icon,day_source,day_preciptype,day_stations,hour_datetime,hour_datetimeEpoch,hour_temp,hour_feelslike,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,hour_icon,hour_source,hour_stations
0,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,00:00:00,1645653600,0.9,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
1,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,01:00:00,1645657200,0.6,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
2,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,02:00:00,1645660800,0.4,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,cloudy,obs,remote


In [26]:
# exclude
weather_exclude = [
"day_feelslikemax",
"day_feelslikemin",
"day_sunriseEpoch",
"day_sunsetEpoch",
"day_description",
"city_latitude",
"city_longitude",
"city_address",
"city_timezone",
"city_tzoffset",
"day_feelslike",
"day_precipprob",
"day_snow",
"day_snowdepth",
"day_windgust",
"day_windspeed",
"day_winddir",
"day_pressure",
"day_cloudcover",
"day_visibility",
"day_severerisk",
"day_conditions",
"day_icon",
"day_source",
"day_preciptype",
"day_stations",
"hour_icon",
"hour_source",
"hour_stations",
"hour_feelslike"
]

In [27]:
df_weather_v2 = df_weather.drop(weather_exclude, axis=1)

In [28]:
df_weather_v2["city"] = df_weather_v2["city_resolvedAddress"].apply(lambda x: x.split(",")[0])
df_weather_v2["city"] = df_weather_v2["city"].replace('Хмельницька область', "Хмельницький")

In [29]:
df_weather_v2.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси


In [30]:
df_weather_v2.shape

(190656, 38)

## Merging Data

In [31]:
df_regions = pd.read_csv(f"{INPUT_REGIONS_DATA_FOLDER}/{REGIONS_DATA_FILE}")

In [32]:
df_regions.head(5)

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5


In [33]:
df_weather_reg = pd.merge(df_weather_v2, df_regions, left_on="city",right_on="center_city_ua")

In [34]:
df_weather_reg.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [35]:
df_weather_reg.shape

(190656, 43)

In [36]:
df_weather_v2.shape

(190656, 38)

In [37]:
df_alarms_v2.dtypes

region_title                        object
region_city                         object
all_region                           int64
start                               object
end                                 object
clean_end                           object
intersection_alarm_id              float64
region_nums                          int64
alarm_nums_24h                     float64
start_time                  datetime64[ns]
end_time                    datetime64[ns]
start_hour                  datetime64[ns]
end_hour                    datetime64[ns]
day_date                            object
start_hour_datetimeEpoch             int64
end_hour_datetimeEpoch               int64
dtype: object

In [38]:
df_alarms_v2.head(3)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,region_nums,alarm_nums_24h,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400


In [39]:
events_dict = df_alarms_v2.to_dict('records')
events_by_hour = []

In [40]:
events_dict[0]

{'region_title': 'Вінниччина',
 'region_city': 'Вінниця',
 'all_region': 0,
 'start': '2022-02-25 22:55:42',
 'end': '2022-02-25 23:41:53',
 'clean_end': '2022-02-25 23:41:53',
 'intersection_alarm_id': nan,
 'region_nums': 1,
 'alarm_nums_24h': 1.0,
 'start_time': Timestamp('2022-02-25 22:55:42'),
 'end_time': Timestamp('2022-02-25 23:41:53'),
 'start_hour': Timestamp('2022-02-25 22:00:00'),
 'end_hour': Timestamp('2022-02-26 00:00:00'),
 'day_date': datetime.date(2022, 2, 25),
 'start_hour_datetimeEpoch': 1645826400,
 'end_hour_datetimeEpoch': 1645833600}

In [41]:
for event in events_dict:
    for d in pd.date_range(start=event["start_hour"], end=event["end_hour"], freq='1H'):
        et = event.copy()
        et["hour_level_event_time"] = d
        events_by_hour.append(et)

In [42]:
df_events_v3 = pd.DataFrame.from_dict(events_by_hour)

In [43]:
df_events_v3["hour_level_event_datetimeEpoch"] = df_events_v3["hour_level_event_time"].apply(lambda x: int(x.timestamp()) if not isNaN(x) else None)

In [44]:
df_events_v3.shape

(58860, 18)

In [45]:
df_events_v3.head(15)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,region_nums,alarm_nums_24h,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 22:00:00,1645826400
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 23:00:00,1645830000
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,1,1.0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-26 00:00:00,1645833600
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 06:00:00,1645855200
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 07:00:00,1645858800
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,1,3.0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645862400
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 07:00:00,1645858800
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,1,1.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 08:00:00,1645862400
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,1,1.0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 08:00:00,1645862400
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,1,1.0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 09:00:00,1645866000


In [46]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [47]:
df_weather_reg.shape

(190656, 43)

In [48]:
df_events_v4 = df_events_v3.copy().add_prefix('event_')

In [49]:
df_weather_v4 = df_weather_reg.merge(df_events_v4, 
                                     how="left", 
                                     left_on=["region_alt","hour_datetimeEpoch"],
                                     right_on=["event_region_title","event_hour_level_event_datetimeEpoch"])

In [50]:
df_weather_v4.to_csv(f"{OUTPUT_DATA_FOLDER}/{ALARMS_WEATHER_MERGED_DATA_FILE}", sep=";")

In [51]:
df_weather_v4.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_region_nums,event_alarm_nums_24h,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,


In [52]:
df_weather_v4.shape

(203414, 61)

## Merge Weather-Alarm-Keywords

In [53]:
df_wak = df_weather_v4.merge(df_isw, how="left",
                             left_on=["day_datetime"],
                             right_on=["report_date"])

In [54]:
df_wak.shape

(203414, 65)

In [55]:
df_wak.to_csv(
    f"{OUTPUT_DATA_FOLDER}/{ALL_MERGED_DATA_FILE}", sep=";", index=False)