# Sort and attribute trips in correct time and space order

Still to do - pull out joint and atwork trips and then put back in after sorting

# Input and output filenames

In [None]:
pipeline_filename = 'output/pipeline.h5'
output_trip_filename = "output/final_trips_time_space_id.csv"

# Libraries

In [None]:
import pandas as pd
import numpy as np
import itertools

# Read tables directly from the pipeline

In [None]:
# get tables (if run as mp then trip_mode_choice is final state of the tables) 
pipeline = pd.io.pytables.HDFStore(pipeline_filename)
tours = pipeline['/tours/trip_mode_choice']
trips = pipeline['/trips/trip_mode_choice']
jtp = pipeline['/joint_tour_participants/trip_mode_choice']

# Add related fields

In [None]:
trips["tour_participants"] = trips.tour_id.map(tours.number_of_participants)
trips["parent_tour_id"] = trips.tour_id.map(tours.index.to_series()).map(tours.parent_tour_id)
trips["tour_start"] = trips.tour_id.map(tours.start)
trips["parent_tour_start"] = trips.parent_tour_id.map(tours.start)
trips["parent_tour_end"] = trips.parent_tour_id.map(tours.end)

# create additional trips records for other persons on joint trips

In [None]:
tour_person_ids = jtp.groupby("tour_id").apply(lambda x: pd.Series({"person_ids": " ".join(x["person_id"].astype("str"))}))
trips = trips.join(tour_person_ids, "tour_id")
trips["person_ids"] = trips["person_ids"].fillna("")
trips.person_ids = trips.person_ids.where(trips.person_ids!="", trips.person_id)
trips["person_ids"] = trips["person_ids"].astype(str)

person_ids = [*map(lambda x: x.split(" "),trips.person_ids.tolist())]
person_ids = list(itertools.chain.from_iterable(person_ids))

trips_expanded = trips.loc[np.repeat(trips.index, trips['tour_participants'])]
trips_expanded.person_id = person_ids

# sort records

In [None]:
trips_expanded["inbound"] = ~trips_expanded.outbound
trips_expanded = trips_expanded.sort_values(['person_id','tour_start','tour_id','inbound','trip_num'])
trips_expanded['sort_id'] = range(len(trips_expanded))

# join atwork trips to parent tour trips and find trip before atwork tour

In [None]:
first_atwork_trip = trips_expanded[trips_expanded.parent_tour_id.notnull()].drop_duplicates("tour_id", keep="first")
parent_tour_trips_with_atwork_trips = trips_expanded.merge(first_atwork_trip, left_on="tour_id", right_on="parent_tour_id")
parent_tour_trips_with_atwork_trips["atwork_depart_before"] = parent_tour_trips_with_atwork_trips.eval("depart_y >= depart_x")

atwork_tour_earlier_trip_id = parent_tour_trips_with_atwork_trips[parent_tour_trips_with_atwork_trips["atwork_depart_before"]][["tour_id_x","sort_id_x"]]
atwork_tour_earlier_trip_id = atwork_tour_earlier_trip_id.drop_duplicates("tour_id_x", keep="last")
atwork_tour_earlier_trip_id = atwork_tour_earlier_trip_id.set_index("tour_id_x")

trips_expanded.loc[trips_expanded.parent_tour_id.notnull(),"parent_tour_sort_id"] = atwork_tour_earlier_trip_id.loc[trips_expanded[trips_expanded.parent_tour_id.notnull()].parent_tour_id].sort_id_x.tolist()

trips_expanded.loc[trips_expanded.parent_tour_id.notnull(),"parent_tour_sort_id"]

trips_expanded["parent_tour_sort_id"] = trips_expanded["parent_tour_sort_id"] + 0.1
trips_expanded["parent_tour_sort_id"] = trips_expanded["parent_tour_sort_id"].where(~trips_expanded["parent_tour_sort_id"].duplicated(),trips_expanded["parent_tour_sort_id"] + 0.1)
trips_expanded["parent_tour_sort_id"] = trips_expanded["parent_tour_sort_id"].where(~trips_expanded["parent_tour_sort_id"].duplicated(),trips_expanded["parent_tour_sort_id"] + 0.1)
trips_expanded["parent_tour_sort_id"] = trips_expanded["parent_tour_sort_id"].where(~trips_expanded["parent_tour_sort_id"].duplicated(),trips_expanded["parent_tour_sort_id"] + 0.1)

trips_expanded.loc[trips_expanded.parent_tour_id.isnull(),"parent_tour_sort_id"] = trips_expanded.loc[trips_expanded.parent_tour_id.isnull(),"sort_id"]

# sort records
trips_expanded = trips_expanded.sort_values(['parent_tour_sort_id'])

# write trips

In [None]:
# trips_expanded[trips_expanded.household_id.isin(["1107250"])].to_csv("test.csv")
# trips_expanded[trips_expanded.household_id.isin(trips_expanded.household_id.sample(frac=0.01))].to_csv("test.csv")
trips_expanded.to_csv(output_trip_filename)