In [1]:
import tarfile
import pandas as pd
from zipfile import ZipFile
from typing import List

# data visualization
import folium
import branca.colormap as cm

## Traffic Data Preprocessing

In [2]:
def extract_csv_files(archive_path: str, headers: List[str]) -> pd.DataFrame:
    if archive_path.endswith(".tar.gz"):
        with tarfile.open(archive_path, "r:gz") as tar:
            output = pd.concat([pd.read_csv(tar.extractfile(file), encoding="latin-1")
                               for file in tar.getnames()])

    if archive_path.endswith(".zip"):
        zip_file = ZipFile(archive_path)

        output = pd.concat([pd.read_csv(zip_file.open(csv_file), header=None)
                            for csv_file in zip_file.namelist()])

    if headers:
        output.columns = headers

    return output

In [3]:
traffic_file_archives = ["../raw_datasets/citypulse_traffic_raw_data_aarhus_aug_sep_2014.tar.gz",
                         "../raw_datasets/citypulse_traffic_raw_data_aarhus_oct_nov_2014.zip",
                         ]

headers = ["status", "avg_measured_time", "avg_speed",	"ext_id",
           "median_measured_time", "timestamp", "vehicle_count", "_id", "report_id"]

traffic_data = pd.concat([extract_csv_files(archive, headers)
                          for archive in traffic_file_archives])

traffic_meta_data = pd.read_csv("http://iot.ee.surrey.ac.uk:8080/datasets/traffic/trafficMetaData.csv", sep=",")

traffic_data = traffic_data.merge(traffic_meta_data,
                                  how="left",
                                  left_on="report_id",
                                  right_on="REPORT_ID")

traffic_data["timestamp"] = pd.to_datetime(traffic_data["timestamp"])

In [4]:
linear = cm.linear.RdYlBu_11.scale(0, 1)

def add_lines(point_a, point_b, size, alpha, id, speed, map) -> None:

    color = linear(1-alpha)

    line = folium.PolyLine(
        [point_a, point_b], 
        color=color, 
        weight=2.5, 
        opacity=1, 
        tooltip=f"{id}: :{size}: {alpha}: {color}: {speed}",
        line_cap="butt"
        )

    line.add_to(map)


center_lat = (traffic_data["POINT_1_LAT"].mean() +
              traffic_data["POINT_2_LAT"].mean())/2
center_lng = (traffic_data["POINT_1_LNG"].mean() +
              traffic_data["POINT_2_LNG"].mean())/2


dv_sample = traffic_data[traffic_data["timestamp"]=='2014-09-04T05:30:00']

lines = [[[dv_sample["POINT_1_LAT"].iloc[_], dv_sample["POINT_1_LNG"].iloc[_]],
        [dv_sample["POINT_2_LAT"].iloc[_],
            dv_sample["POINT_2_LNG"].iloc[_]],
        dv_sample["vehicle_count"].iloc[_],
        dv_sample["vehicle_count"].iloc[_]/dv_sample["vehicle_count"].max(),
        dv_sample["extID"].iloc[_],
        dv_sample["NDT_IN_KMH"].iloc[_]
        ]
        for _ in range(dv_sample.shape[0])]


m = folium.Map(location=[center_lat, center_lng], zoom_start=12)


for line in lines:
    add_lines(line[0], line[1], line[2], line[3], line[4], line[5], m)

m

In [None]:
linear.to_step(6)