In [4]:
import pandas as pd
import numpy as np
import os

In [5]:
root_dir = os.getcwd()

files = ["low_dense_low_rise_call_records.csv",
        "high_dense_low_rise_call_records.csv",
         "low_dense_high_rise_call_records.csv",
         "high_dense_high_rise_call_records.csv"]
def preprocess_call_record(root_dir, file):
    data_dir = os.path.join(root_dir, 'data')
    df_path = os.path.join(data_dir, file)
    df = pd.read_csv(df_path)
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])

    min_time = 0
    max_time = 24*60*60

    min_date = df['Timestamp'].dt.date.min()
    max_date = df['Timestamp'].dt.date.max()

    min_floor = df['Floor'].min()
    max_floor = df['Floor'].max()

    columns = [ str(floor) + direction for floor in range(min_floor, max_floor + 1) for direction in ['_Up', '_Down']]

    rows = []
    for date in pd.date_range(start=min_date, end=max_date):
        for seconds in range(min_time, max_time):
            timestamp = pd.Timestamp(date) + pd.Timedelta(seconds=seconds)
            rows.append(timestamp)
    rows = pd.DatetimeIndex(rows)


    call_record = pd.DataFrame(0, index=rows, columns = ['timestamp','day','time']+columns)
    call_record['timestamp'] = call_record.index
    call_record['day'] = call_record.index.weekday
    call_record['time'] = call_record.index.hour * 3600 + call_record.index.minute * 60 + call_record.index.second

    def preprocess_calls(row):
        time = row['Timestamp']
        floor = row['Floor']
        direction = 'Up' if row['Direction'] == 1 else 'Down'
        
        call_record.at[time, f"{floor}_{direction}"] = 1
        
    df.apply(preprocess_calls, axis=1)

    call_record['timestamp'][call_record.isna().any(axis=1)]

    call_record = call_record.drop(columns=['0_Down', '10_Up'])


    output_path = df_path.replace('call_records.csv', 'call_record_processed.csv')
    print(f"Saving processed call record to {output_path}")
    call_record.to_csv(output_path, index=False)
    # pd timestamp to unix timestamp
    call_record['timestamp'] = call_record['timestamp'].astype(np.int64) // 10

    # scale timestamp, day, time
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    scaler_timestamp = StandardScaler()
    scaler_day = MinMaxScaler()
    scaler_time = MinMaxScaler()

    call_record['timestamp'] = scaler_timestamp.fit_transform(call_record[['timestamp']])
    call_record['day'] = scaler_day.fit_transform(call_record[['day']])
    call_record['time'] = scaler_time.fit_transform(call_record[['time']])

    train_dir = os.path.join(os.path.dirname(root_dir), file.replace('_call_records.csv', ''))

    #save scalers as pkl files
    import joblib
    scaler_time_dir = os.path.join(train_dir,'scaler', 'scaler_time.pkl')
    scaler_day_dir = os.path.join(train_dir, 'scaler', 'scaler_day.pkl')
    scaler_timestamp_dir = os.path.join(train_dir, 'scaler', 'scaler_timestamp.pkl')
    print(f"Saving scalers to {scaler_time_dir}, {scaler_day_dir}, {scaler_timestamp_dir}")
    joblib.dump(scaler_timestamp, scaler_timestamp_dir)
    joblib.dump(scaler_day, scaler_day_dir)
    joblib.dump(scaler_time, scaler_time_dir)

    trainset = call_record.iloc[0: int(len(call_record)*0.8)]
    testset = call_record.iloc[int(len(call_record)*0.8): ]
    trainset_dir = os.path.join(train_dir, file.replace('call_records.csv', 'trainset.csv'))
    testset_dir = os.path.join(train_dir, file.replace('call_records.csv', 'testset.csv'))
    print(f"Saving trainset to {trainset_dir} and testset to {testset_dir}")
    trainset.to_csv(trainset_dir,index=False)
    testset.to_csv(testset_dir,index=False)

In [6]:
for file in files:
    preprocess_call_record(root_dir,file)

Saving processed call record to e:\iCloudDrive\master of applied computing\capstone project\arrival_simulator\data\low_dense_low_rise_call_record_processed.csv
Saving scalers to e:\iCloudDrive\master of applied computing\capstone project\low_dense_low_rise\scaler\scaler_time.pkl, e:\iCloudDrive\master of applied computing\capstone project\low_dense_low_rise\scaler\scaler_day.pkl, e:\iCloudDrive\master of applied computing\capstone project\low_dense_low_rise\scaler\scaler_timestamp.pkl
Saving trainset to e:\iCloudDrive\master of applied computing\capstone project\low_dense_low_rise\low_dense_low_rise_trainset.csv and testset to e:\iCloudDrive\master of applied computing\capstone project\low_dense_low_rise\low_dense_low_rise_testset.csv
Saving processed call record to e:\iCloudDrive\master of applied computing\capstone project\arrival_simulator\data\high_dense_low_rise_call_record_processed.csv
Saving scalers to e:\iCloudDrive\master of applied computing\capstone project\high_dense_low_r