### Noise Data Final Cleaning

In [40]:
import pandas as pd

In [41]:
df_noise = pd.read_csv('EDA_weather/data/final_noise_data.csv')

In [42]:
# sort the df by description and date
df_noise = df_noise.sort_values(by=['description', 'result_timestamp'])

In [46]:
df_noise.head()

Unnamed: 0,location,year,month,day_month,day_week,10_min_interval_start_time,lamax,laeq,lceq,lcpeak
17856,MP 01: Naamsestraat 35 Maxim,2022,2,28,Monday,08:20,61.254569,59.892893,70.729188,82.077462
17857,MP 01: Naamsestraat 35 Maxim,2022,2,28,Monday,08:30,56.527,54.629,65.91175,77.521717
17858,MP 01: Naamsestraat 35 Maxim,2022,2,28,Monday,08:40,59.563057,57.756369,65.896815,77.866975
17859,MP 01: Naamsestraat 35 Maxim,2022,2,28,Monday,08:50,,,,
17860,MP 01: Naamsestraat 35 Maxim,2022,2,28,Monday,09:00,,,,


In [44]:
# drop the month column
df_noise.drop(columns=['month'], inplace=True)
# change description column to location
df_noise.rename(columns={'description': 'location'}, inplace=True)
# create new column Month with the month number
df_noise['month'] = df_noise['result_timestamp'].str[5:7]
# create new column Year with the year
df_noise['year'] = df_noise['result_timestamp'].str[:4]
# create new column Day with the day
df_noise['day_month'] = df_noise['result_timestamp'].str[8:10]
# create new column Time with the time
df_noise['10_min_interval_start_time'] = df_noise['result_timestamp'].str[11:16]
# create new column day_of_week with the day of the week
df_noise['day_week'] = pd.to_datetime(df_noise['result_timestamp']).dt.day_name()
# drop result_timestamp column
df_noise.drop(columns=['result_timestamp'], inplace=True)
# put the columns in the following order: location, year, month, day_month, day_week, time, lamax, laeq, lceq, lcpeak
df_noise = df_noise[['location', 'year', 'month', 'day_month', 'day_week', '10_min_interval_start_time', 'lamax', 'laeq', 'lceq', 'lcpeak']]

In [45]:
# what dataype is each column
df_noise.dtypes
# convert the following columns to numeric: year, month, day_month
df_noise['year'] = pd.to_numeric(df_noise['year'])
df_noise['month'] = pd.to_numeric(df_noise['month'])
df_noise['day_month'] = pd.to_numeric(df_noise['day_month'])

In [47]:
# save the df as a csv file
df_noise.to_csv('EDA_weather/data/final_noise_data.csv', index=False)

### Weather Data create final file + locations map

In [40]:
df_weather=pd.read_csv('EDA_weather/data/weather_data/Weather_cleaned.csv')

In [41]:
df_weather.head()

Unnamed: 0,DATEUTC,ID,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,LC_RAD60,LC_TEMP
0,2022-01-01 00:10:00,2,92,11.78,38,0,0.0,0.0,-169.0,0.43,0.0,13.048027
1,2022-01-01 00:20:00,2,92,11.73,37,0,0.0,0.0,-170.0,0.33,0.0,12.985849
2,2022-01-01 00:30:00,2,92,11.73,38,0,0.0,0.0,-167.0,0.46,0.0,12.950322
3,2022-01-01 00:40:00,2,92,11.72,37,0,0.0,0.0,-160.0,0.52,0.0,12.94955
4,2022-01-01 00:50:00,2,92,11.72,38,0,0.0,0.0,-166.0,0.51,0.0,12.952268


In [42]:
df_meta = pd.read_csv('EDA_weather/data/weather_data/01_Metadata_v2.csv')

In [43]:
df_meta.head()

Unnamed: 0,ID,LAT,LON,ALT,Local_climate_zone,Landcover,building_height,Sky_view_factor,installation_height
0,LC-002,50.847,4.756,47,9,1,4.28,0.610367,2
1,LC-003,50.87,4.728,44,5,1,5.038286,0.691315,2
2,LC-004,50.871,4.685,31,9,1,9.050964,0.798586,2
3,LC-005,50.881,4.713,26,2,1,5.05,0.68541,2
4,LC-006,50.912,4.716,13,6,1,0.0,0.954032,2


In [47]:
# show a map of the stations the cordinates are in the df_meta file
import folium
from folium.plugins import MarkerCluster

# create map saating in Leuven, Belgium
m = folium.Map(location=[50.8798, 4.7005], zoom_start=10)
# add marker for every station
for i in range(0,len(df_meta)):
    folium.Marker(
        location=[df_meta.iloc[i]['LAT'], df_meta.iloc[i]['LON']],
        popup=df_meta.iloc[i]['ID'],
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(m)

In [48]:
noise_sensors=['Naamsestraat 35, 3000 Leuven','Naamsestraat 57, 3000 Leuven','Naamsestraat 62, 3000 Leuven','Naamsestraat 76, 3000 Leuven','Naamsestraat 81, 3000 Leuven','Naamsestraat 80, 3000 Leuven','Parkstraat 1, 3000 Leuven']

In [49]:
# use geopy to make a list of coordinates for the adesses in the list noise_sensors
import geopy
from geopy.geocoders import Nominatim
geopy.geocoders.options.default_user_agent = "my-application"
geolocator = Nominatim(user_agent="http")
coordinates=[]
for i in range(0,len(noise_sensors)):
    print(noise_sensors[i])
    location = geolocator.geocode(noise_sensors[i])
    coordinates.append((location.latitude, location.longitude))
print(coordinates)


Naamsestraat 35, 3000 Leuven
Naamsestraat 57, 3000 Leuven
Naamsestraat 62, 3000 Leuven
Naamsestraat 76, 3000 Leuven
Naamsestraat 81, 3000 Leuven
Naamsestraat 80, 3000 Leuven
Parkstraat 1, 3000 Leuven
[(50.8771209, 4.7007076), (50.87627195, 4.70101516463468), (50.8778231, 4.7006761), (50.875237299999995, 4.700070765091863), (50.8738083, 4.700044226810585), (50.87488945, 4.699574800427602), (50.87428215, 4.700127779155729)]


In [50]:
# add the coordinates to the map of the stations
for i in range(0,len(coordinates)):
    folium.Marker(
        location=[coordinates[i][0], coordinates[i][1]],
        popup=noise_sensors[i],
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(m)
# also add he following coordinates: (50,87527, 4,70149) and (50,87870, 4,70111)
folium.Marker(
    location=[50.87527, 4.70149],
    popup='Kiosk City Park',
    icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)
folium.Marker(
    location=[50.87870, 4.70111],
    popup='Vrijthof',
    icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)

<folium.map.Marker at 0x137731b40>

In [52]:
# export map as html
m.save('EDA_weather/data/weather_data/sensors_map.html')