In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
path_to_data = os.path.join("raw_data")
path_to_output = os.path.join("data")
file_name = "camera.csv"
list_columns_to_keep = ["dev_id", "dev_name", "way_id", "car_num", "cap_date"]
list_dev_id_to_select = [
    2148469, 2148468, 2148472, 2148470, 2148471, 2148740, 2148742, 
    2148741, 2148737, 2148739, 2148575, 2148572, 2148577, 
    2148532, 2148571, 2148574, 2148573]
start_ts = pd.Timestamp("2018-04-01 00:00:00")
end_ts = pd.Timestamp("2018-05-01 00:00:00")

In [None]:
def parse_inter_name(name):
    for direction_name in ["东口", "西口", "南口", "北口", "东向西", "西向东", "南向北", "北向南"]:
        if direction_name in name:
            index_direction = name.index(direction_name)
            return name[:index_direction]
    
    return "非地面路口" 
    
def parse_entrance_name(name):
    for direction_name in ["东口", "西口", "南口", "北口", "东向西", "西向东", "南向北", "北向南"]:
        if direction_name in name:
            return direction_name
        
    return "非地面路口" 

In [None]:
df_data = pd.read_csv(os.path.join(path_to_data, file_name), delimiter="\t")
df_data["cap_date"] = pd.to_datetime(df_data["cap_date"])
# separate date and time
df_data["date"] = df_data["cap_date"].dt.date
df_data["timestamp"] = df_data["cap_date"].dt.time
# separate intersection and entrance
df_data["inter_name"] = df_data["dev_name"].apply(lambda r: parse_inter_name(r))
df_data["entrance"] = df_data["dev_name"].apply(lambda r: parse_entrance_name(r))

In [None]:
# filter cars with no num
df_data = df_data[df_data["car_num"] != '0']
# filter by dev id
df_data = df_data[df_data["dev_id"].isin(list_dev_id_to_select)]
# filter by time
df_data = df_data[(df_data["cap_date"] >= start_ts) & (df_data["cap_date"] < end_ts)]

In [None]:
# code inter_name
df_code_inter_name = pd.read_csv(os.path.join(path_to_data, "list_intersections_disambiguition.csv"))
dic_code_inter_name = {}
for i in range(len(df_code_inter_name)):
    record = df_code_inter_name.loc[i]
    dic_code_inter_name[record["inter_name_kakou"]] = record["scats_id"]
df_data["inter_name"] = df_data["inter_name"].apply(lambda r: dic_code_inter_name[r])
# code car_num
# code entrance
dic_code_entrance = {
    "东口": "east",
    "西口": "west",
    "南口": "south",
    "北口": "north",
    "东向西": "east", 
    "西向东": "west", 
    "南向北": "south",
    "北向南": "north",
}
df_data["entrance"] = df_data["entrance"].apply(lambda r: dic_code_entrance[r])
# code way id
dic_code_way_id = {
        6: "lr",   # 左转+右转
        7: "lsr",  # 左转+直行+右转,
        1: "s",    #直行,
        2: "l",    #左转,
        3: "r",    #右转,
        4: "ls",   #左转+直行,
        5: "sr",   #右转+直行
    }
df_data["way_id"] = df_data["way_id"].apply(lambda r: dic_code_way_id[r])


In [None]:
# inter_name, entrance, way, car_num, cap_date
df_data = df_data[["inter_name", "entrance", "way_id", "car_num", "date", "timestamp"]]

In [None]:
list_unique_dates = np.unique(df_data.date.values)
for unique_date in list_unique_dates:
    df_data_sub = df_data[df_data["date"] == unique_date]
    df_data_sub.sort_values(by=["timestamp", "inter_name", "entrance"], inplace=True)
    df_data_sub.to_csv(os.path.join(path_to_output, "camera-{0}.csv".format(unique_date)))

In [None]:
# extract records by car

In [None]:
# extract records by second

In [None]:
# calculate num_of_captured, cars distribution

In [None]:
df_data