# Live Daten einlesen

In [229]:
import pandas as pd
import json

json_file_path = "./data/msr.json"

# read JSON
with open(json_file_path, 'r') as file:
    data = json.load(file)

# create an empty list
df_list = []

# get out all sensorMeasurements data and adds it to the rest of the detector measurements
for entry in data['detector_measurements']:
    measurements_data = entry['sensorMeasurements']
    entry.pop('sensorMeasurements')  # delete 'sensorMeasurements'(we only need each measurement)
    for measurement in measurements_data:
        entry.update(measurement)  # add details of sensorMeasurements entry
        df_list.append(entry.copy())  # append copy of entry to list

# create DataFrame
df = pd.DataFrame(df_list)

# show DataFrame
df

Unnamed: 0,id,time,value,hasError,errorReason,index,kind,numberOfInputValuesUsed
0,CH:0542.05,2023-12-02T12:37:00.000000Z,0.000000,True,VD_OFFLINE,0,,
1,CH:0026.03,2023-12-02T12:37:00.000000Z,1320.000000,False,,11,trafficFlow,
2,CH:0026.03,2023-12-02T12:37:00.000000Z,96.699997,False,,12,trafficSpeed,22.0
3,CH:0026.03,2023-12-02T12:37:00.000000Z,0.000000,False,,21,trafficFlow,22.0
4,CH:0026.03,2023-12-02T12:37:00.000000Z,0.000000,False,,1,trafficFlow,22.0
...,...,...,...,...,...,...,...,...
6342,CH:0505.03,2023-12-02T12:37:00.000000Z,0.000000,False,,1,trafficFlow,22.0
6343,CH:0505.02,2023-12-02T12:37:00.000000Z,900.000000,False,,11,trafficFlow,
6344,CH:0505.02,2023-12-02T12:37:00.000000Z,115.300003,False,,12,trafficSpeed,15.0
6345,CH:0505.02,2023-12-02T12:37:00.000000Z,0.000000,False,,21,trafficFlow,15.0


In [230]:
# make sure that time is a datetime
df['time'] = pd.to_datetime(df['time'])
df

Unnamed: 0,id,time,value,hasError,errorReason,index,kind,numberOfInputValuesUsed
0,CH:0542.05,2023-12-02 12:37:00+00:00,0.000000,True,VD_OFFLINE,0,,
1,CH:0026.03,2023-12-02 12:37:00+00:00,1320.000000,False,,11,trafficFlow,
2,CH:0026.03,2023-12-02 12:37:00+00:00,96.699997,False,,12,trafficSpeed,22.0
3,CH:0026.03,2023-12-02 12:37:00+00:00,0.000000,False,,21,trafficFlow,22.0
4,CH:0026.03,2023-12-02 12:37:00+00:00,0.000000,False,,1,trafficFlow,22.0
...,...,...,...,...,...,...,...,...
6342,CH:0505.03,2023-12-02 12:37:00+00:00,0.000000,False,,1,trafficFlow,22.0
6343,CH:0505.02,2023-12-02 12:37:00+00:00,900.000000,False,,11,trafficFlow,
6344,CH:0505.02,2023-12-02 12:37:00+00:00,115.300003,False,,12,trafficSpeed,15.0
6345,CH:0505.02,2023-12-02 12:37:00+00:00,0.000000,False,,21,trafficFlow,15.0


In [231]:
# index mapping to vehicle type

#sk: Vehicle Type würde ich nicht in die Live Daten integrieren, sondern eine im Filter hinterlegen: wenn "Lorry" ausgewählt ist, dann nimm alle Zeilen mit index 21 und index 22.. etc.

In [232]:
# add canton

# get measurement_station id
df['id_measurement_station'] = df['id'].str.split('.').str[0]

# read mst data
json_file_path = "./data/mst.json"
with open(json_file_path, 'r') as file:
    json_data = json.load(file)

# Create mapping
canton_mapping = {entry['id']: entry['canton'] for entry in json_data}
eastLv95_mapping = {entry['id']: entry['eastLv95'] for entry in json_data}
northLv95_mapping = {entry['id']: entry['northLv95'] for entry in json_data}

# Map 'canton' values to the DataFrame
df['canton'] = df['id_measurement_station'].map(canton_mapping)

df['eastLv95'] = df['id_measurement_station'].map(eastLv95_mapping)
df['eastLv95'] = df['eastLv95'].astype(float)

df['northLv95'] = df['id_measurement_station'].map(northLv95_mapping )
df['northLv95'] = df['northLv95'].astype(float)

# Display the updated DataFrame
df

Unnamed: 0,id,time,value,hasError,errorReason,index,kind,numberOfInputValuesUsed,id_measurement_station,canton,eastLv95,northLv95
0,CH:0542.05,2023-12-02 12:37:00+00:00,0.000000,True,VD_OFFLINE,0,,,CH:0542,AG,2668970.0,1254246.0
1,CH:0026.03,2023-12-02 12:37:00+00:00,1320.000000,False,,11,trafficFlow,,CH:0026,LU,2666381.0,1205794.0
2,CH:0026.03,2023-12-02 12:37:00+00:00,96.699997,False,,12,trafficSpeed,22.0,CH:0026,LU,2666381.0,1205794.0
3,CH:0026.03,2023-12-02 12:37:00+00:00,0.000000,False,,21,trafficFlow,22.0,CH:0026,LU,2666381.0,1205794.0
4,CH:0026.03,2023-12-02 12:37:00+00:00,0.000000,False,,1,trafficFlow,22.0,CH:0026,LU,2666381.0,1205794.0
...,...,...,...,...,...,...,...,...,...,...,...,...
6342,CH:0505.03,2023-12-02 12:37:00+00:00,0.000000,False,,1,trafficFlow,22.0,CH:0505,VD,2507348.0,1141026.0
6343,CH:0505.02,2023-12-02 12:37:00+00:00,900.000000,False,,11,trafficFlow,,CH:0505,VD,2507348.0,1141026.0
6344,CH:0505.02,2023-12-02 12:37:00+00:00,115.300003,False,,12,trafficSpeed,15.0,CH:0505,VD,2507348.0,1141026.0
6345,CH:0505.02,2023-12-02 12:37:00+00:00,0.000000,False,,21,trafficFlow,15.0,CH:0505,VD,2507348.0,1141026.0


# Clusterings - a try

In [233]:
# tranform Lv95 to WGS84
from pyproj import Proj, transform

lv95 = Proj(init='epsg:2056')  # Lv95
wgs84 = Proj(init='epsg:4326')  # WGS84
df['lon'], df['lat'] = transform(lv95, wgs84, df['eastLv95'].values, df['northLv95'].values)


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  df['lon'], df['lat'] = transform(lv95, wgs84, df['eastLv95'].values, df['northLv95'].values)


In [243]:
# I want to draw the dots just once. This is not a final solution (only for displaying the different clustrings)
df_short = df.drop_duplicates(subset='id_measurement_station')
df_short

Unnamed: 0,id,time,value,hasError,errorReason,index,kind,numberOfInputValuesUsed,id_measurement_station,canton,eastLv95,northLv95,lon,lat
0,CH:0542.05,2023-12-02 12:37:00+00:00,0.0,True,VD_OFFLINE,0,,,CH:0542,AG,2668970.0,1254246.0,8.352890,47.435399
1,CH:0026.03,2023-12-02 12:37:00+00:00,1320.0,False,,11,trafficFlow,,CH:0026,LU,2666381.0,1205794.0,8.311432,46.999884
5,CH:0301.03,2023-12-02 12:37:00+00:00,1440.0,False,,11,trafficFlow,,CH:0301,ZH,2674350.0,1246800.0,8.422963,47.367848
35,CH:0668.01,2023-12-02 12:37:00+00:00,180.0,False,,11,trafficFlow,,CH:0668,VS,2561300.0,1121840.0,6.936855,46.246876
51,CH:0141.01,2023-12-02 12:37:00+00:00,180.0,False,,11,trafficFlow,,CH:0141,TG,2731748.0,1267622.0,9.188821,47.546099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6292,CH:0270.05,2023-12-02 12:37:00+00:00,0.0,True,VD_OFFLINE,0,,,CH:0270,ZH,2685589.0,1251824.0,8.572700,47.411666
6294,CH:0363.01,2023-12-02 12:37:00+00:00,180.0,False,,11,trafficFlow,,CH:0363,BE,2624175.0,1232976.0,7.757955,47.247258
6301,CH:0505.04,2023-12-02 12:37:00+00:00,1080.0,False,,11,trafficFlow,,CH:0505,VD,2507348.0,1141026.0,6.233557,46.414169
6309,CH:0318.04,2023-12-02 12:37:00+00:00,540.0,False,,11,trafficFlow,,CH:0318,SG,2751449.0,1212623.0,9.431870,47.047327


## canton

In [226]:


import folium
from folium.plugins import MarkerCluster
from folium import FeatureGroup, LayerControl
from IPython.display import display

center_lat = df_short['lat'].mean()
center_lon = df_short['lon'].mean()

m = folium.Map(location=[center_lat, center_lon], zoom_start=8)

# group markers
# marker_cluster = MarkerCluster().add_to(m)
layer_control = LayerControl().add_to(m)

for index, row in df_short.iterrows():

    folium.CircleMarker(
        location= [row['lat'], row['lon']],
        radius=4,
        color='grey',
        fill=True,
        fill_color='grey',
        fill_opacity=0.4,
        popup=row['canton']
    ).add_to(m)
m.save('map_canton.html')
display(m)

## k-means

In [244]:
from sklearn.cluster import KMeans


desired_number_of_clusters = 13
kmeans = KMeans(n_clusters=desired_number_of_clusters)
df_short['cluster_kmeans'] = kmeans.fit_predict(df_short[['eastLv95', 'northLv95']])

  super()._check_params_vs_input(X, default_n_init=10)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_short['cluster_kmeans'] = kmeans.fit_predict(df_short[['eastLv95', 'northLv95']])


In [237]:
center_lat = df_short['lat'].mean()
center_lon = df_short['lon'].mean()

m = folium.Map(location=[center_lat, center_lon], zoom_start=8)

# group markers
# marker_cluster = MarkerCluster().add_to(m)
layer_control = LayerControl().add_to(m)

#add colors
colors_catalog = {
    1: 'darkblue', 2:'blue', 3: 'darkred',4:  'orange', 5: 'purple', 6: 'gray', 7: 'cadetblue',8: 'lightgreen',9: 'darkgreen', 10:'black',11: 'pink',12:'white', 0:'red'
}

for index, row in df_short.iterrows():
    color = colors_catalog.get(row['cluster_kmeans'], '#808080')

    folium.CircleMarker(
        location= [row['lat'], row['lon']],
        radius=4,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=1,
        popup=row['cluster_kmeans']
    ).add_to(m)
m.save('map_k-means.html')
display(m)